You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/16 13:43:51 UTC

svn commit: r1103699 - in /lucene/dev/branches/docvalues/lucene/src: java/org/apache/lucene/document/ java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/ java/org/apache/lucene/index/codecs/preflex/ test-framework/org/apache/lucene/index/

Author: simonw
Date: Mon May 16 11:43:51 2011
New Revision: 1103699

URL: http://svn.apache.org/viewvc?rev=1103699&view=rev
Log:
LUCENE-3070: Added UOE to PreFlex Codec, Added Random DocValues injection to RandomIndexWriter, Added basic DocValues verification to CheckIndex

Modified:
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java
    lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java
    lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java Mon May 16 11:43:51 2011
@@ -73,7 +73,6 @@ import org.apache.lucene.util.BytesRef;
  * </pre>
  * 
  * */
-@SuppressWarnings("serial")
 public class DocValuesField extends AbstractField implements PerDocFieldValues {
 
   protected BytesRef bytes;

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java Mon May 16 11:43:51 2011
@@ -27,6 +27,9 @@ import org.apache.lucene.document.Abstra
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
+import org.apache.lucene.index.codecs.PerDocValues;
+import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.values.DocValuesEnum;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 
@@ -195,6 +198,9 @@ public class CheckIndex {
 
       /** Status for testing of term vectors (null if term vectors could not be tested). */
       public TermVectorStatus termVectorStatus;
+      
+      /** Status for testing of DocValues (null if DocValues could not be tested). */
+      public DocValuesStatus docValuesStatus;
     }
 
     /**
@@ -254,6 +260,15 @@ public class CheckIndex {
       /** Exception thrown during term vector test (null on success) */
       public Throwable error = null;
     }
+    
+    public static final class DocValuesStatus {
+      /** Number of documents tested. */
+      public int docCount;
+      /** Total number of docValues tested. */
+      public long totalValueFields;
+      /** Exception thrown during doc values test (null on success) */
+      public Throwable error = null;
+    }
   }
 
   /** Create a new CheckIndex on the directory. */
@@ -499,6 +514,8 @@ public class CheckIndex {
 
         // Test Term Vectors
         segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
+        
+        segInfoStat.docValuesStatus = testDocValues(info, reader);
 
         // Rethrow the first exception we encountered
         //  This will cause stats for failed segments to be incremented properly
@@ -510,6 +527,8 @@ public class CheckIndex {
           throw new RuntimeException("Stored Field test failed");
         } else if (segInfoStat.termVectorStatus.error != null) {
           throw new RuntimeException("Term Vector test failed");
+        }  else if (segInfoStat.docValuesStatus.error != null) {
+          throw new RuntimeException("DocValues test failed");
         }
 
         msg("");
@@ -920,6 +939,60 @@ public class CheckIndex {
 
     return status;
   }
+  
+  private Status.DocValuesStatus testDocValues(SegmentInfo info,
+      SegmentReader reader) {
+    final Status.DocValuesStatus status = new Status.DocValuesStatus();
+    try {
+      if (infoStream != null) {
+        infoStream.print("    test: DocValues........");
+      }
+      final FieldInfos fieldInfos = info.getFieldInfos();
+      for (FieldInfo fieldInfo : fieldInfos) {
+        if (fieldInfo.hasDocValues()) {
+          status.totalValueFields++;
+          final PerDocValues perDocValues = reader.perDocValues();
+          final DocValues docValues = perDocValues.docValues(fieldInfo.name);
+          if (docValues == null) {
+            continue;
+          }
+          final DocValuesEnum values = docValues.getEnum();
+          while (values.nextDoc() != DocValuesEnum.NO_MORE_DOCS) {
+            switch (fieldInfo.docValues) {
+            case BYTES_FIXED_DEREF:
+            case BYTES_FIXED_SORTED:
+            case BYTES_FIXED_STRAIGHT:
+            case BYTES_VAR_DEREF:
+            case BYTES_VAR_SORTED:
+            case BYTES_VAR_STRAIGHT:
+              values.bytes();
+              break;
+            case FLOAT_32:
+            case FLOAT_64:
+              values.getFloat();
+              break;
+            case INTS:
+              values.getInt();
+              break;
+            default:
+              throw new IllegalArgumentException("Field: " + fieldInfo.name
+                  + " - no such DocValues type: " + fieldInfo.docValues);
+            }
+          }
+        }
+      }
+
+      msg("OK [" + status.docCount + " total doc Count; Num DocValues Fields "
+          + status.totalValueFields);
+    } catch (Throwable e) {
+      msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+      status.error = e;
+      if (infoStream != null) {
+        e.printStackTrace(infoStream);
+      }
+    }
+    return status;
+  }
 
   /**
    * Test term vectors for a segment.

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Mon May 16 11:43:51 2011
@@ -251,7 +251,7 @@ final class DocFieldProcessor extends Do
         fieldsWriter.addField(field, fp.fieldInfo);
       }
       if (field.hasDocValues()) {
-        final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo, fieldInfos);
+        final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo);
         docValuesConsumer.add(docState.docID, field.getDocValues());
       }
     }
@@ -292,7 +292,7 @@ final class DocFieldProcessor extends Do
   final private Map<String, DocValuesConsumer> docValues = new HashMap<String, DocValuesConsumer>();
   final private Map<Integer, PerDocConsumer> perDocConsumers = new HashMap<Integer, PerDocConsumer>();
 
-  DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo, FieldInfos infos) 
+  DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo) 
       throws IOException {
     DocValuesConsumer docValuesConsumer = docValues.get(fieldInfo.name);
     if (docValuesConsumer != null) {
@@ -303,12 +303,12 @@ final class DocFieldProcessor extends Do
       PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(fieldInfo.getCodecId());
       SegmentCodecs codecs = perDocWriteState.segmentCodecs;
       assert codecs.codecs.length > fieldInfo.getCodecId();
-      
       Codec codec = codecs.codecs[fieldInfo.getCodecId()];
       perDocConsumer = codec.docsConsumer(perDocWriteState);
       perDocConsumers.put(Integer.valueOf(fieldInfo.getCodecId()), perDocConsumer);
     }
     docValuesConsumer = perDocConsumer.addValuesField(fieldInfo);
+    fieldInfo.commitDocValues();
     docValues.put(fieldInfo.name, docValuesConsumer);
     return docValuesConsumer;
   }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java Mon May 16 11:43:51 2011
@@ -127,6 +127,7 @@ public final class FieldInfo {
   }
   
   private boolean vectorsCommitted;
+  private boolean docValuesCommitted;
  
   /**
    * Reverts all uncommitted changes on this {@link FieldInfo}
@@ -138,6 +139,10 @@ public final class FieldInfo {
       storePositionWithTermVector = false;
       storeTermVector = false;  
     }
+    
+    if (docValues != null && !docValuesCommitted) {
+      docValues = null;
+    }
   }
 
   /**
@@ -150,4 +155,9 @@ public final class FieldInfo {
     assert storeTermVector;
     vectorsCommitted = true;
   }
+  
+  void commitDocValues() {
+    assert hasDocValues();
+    docValuesCommitted = true;
+  }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java Mon May 16 11:43:51 2011
@@ -750,5 +750,5 @@ public final class FieldInfos implements
     }
     return roFis;
   }
-
+  
 }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java Mon May 16 11:43:51 2011
@@ -317,22 +317,22 @@ final class PerFieldCodecWrapper extends
   }
   
   private final class PerDocConsumers extends PerDocConsumer {
-    private final ArrayList<PerDocConsumer> consumers = new ArrayList<PerDocConsumer>();
+    private final PerDocConsumer[] consumers;
+    private final Codec[] codecs;
+    private final PerDocWriteState state;
 
     public PerDocConsumers(PerDocWriteState state) throws IOException {
       assert segmentCodecs == state.segmentCodecs;
-      final Codec[] codecs = segmentCodecs.codecs;
-      for (int i = 0; i < codecs.length; i++) {
-        consumers.add(codecs[i].docsConsumer(new PerDocWriteState(state, i)));
-      }
+      this.state = state;
+      codecs = segmentCodecs.codecs;
+      consumers = new PerDocConsumer[codecs.length];
     }
 
     public void close() throws IOException {
-      Iterator<PerDocConsumer> it = consumers.iterator();
       IOException err = null;
-      while (it.hasNext()) {
+      for (int i = 0; i < consumers.length; i++) {
         try {
-          PerDocConsumer next = it.next();
+          final PerDocConsumer next = consumers[i];
           if (next != null) {
             next.close();
           }
@@ -351,10 +351,13 @@ final class PerFieldCodecWrapper extends
 
     @Override
     public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
-      assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
-      final PerDocConsumer perDoc = consumers.get(field.getCodecId());
+      final int codecId = field.getCodecId();
+      assert codecId != FieldInfo.UNASSIGNED_CODEC_ID;
+      PerDocConsumer perDoc = consumers[codecId];
       if (perDoc == null) {
-        return null;
+        perDoc = codecs[codecId].docsConsumer(new PerDocWriteState(state, codecId));
+        assert perDoc != null;
+        consumers[codecId] = perDoc;
       }
       return perDoc.addValuesField(field);
     }

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java Mon May 16 11:43:51 2011
@@ -77,6 +77,7 @@ public class DefaultDocValuesConsumer ex
                 Writer.INDEX_EXTENSION));
             assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
                 Writer.INDEX_EXTENSION));
+            // until here all types use an index
           case BYTES_FIXED_STRAIGHT:
           case FLOAT_32:
           case FLOAT_64:

Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java Mon May 16 11:43:51 2011
@@ -84,11 +84,11 @@ public class PreFlexCodec extends Codec 
 
   @Override
   public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
-    return null;
+    throw new UnsupportedOperationException("PerDocConsumer is not supported by Preflex codec");
   }
 
   @Override
   public PerDocValues docsProducer(SegmentReadState state) throws IOException {
-    return null;
+    throw new UnsupportedOperationException("PerDocValues is not supported by Preflex codec");
   }
 }

Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java Mon May 16 11:43:51 2011
@@ -23,9 +23,13 @@ import java.util.Random;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.DocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexWriter; // javadoc
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.values.Type;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.Version;
 import org.apache.lucene.util._TestUtil;
@@ -44,6 +48,10 @@ public class RandomIndexWriter implement
   int flushAt;
   private double flushAtFactor = 1.0;
   private boolean getReaderCalled;
+  private final int fixedBytesLength;
+  private final long docValuesFieldPrefix;
+  private volatile boolean doDocValues;
+  private CodecProvider codecProvider;
 
   // Randomly calls Thread.yield so we mixup thread scheduling
   private static final class MockIndexWriter extends IndexWriter {
@@ -91,16 +99,79 @@ public class RandomIndexWriter implement
       System.out.println("codec default=" + w.getConfig().getCodecProvider().getDefaultFieldCodec());
       w.setInfoStream(System.out);
     }
+    /* TODO: find some what to make that random...
+     * This must be fixed across all fixed bytes 
+     * fields in one index. so if you open another writer
+     * this might change if I use r.nextInt(x)
+     * maybe we can peek at the existing files here? 
+     */
+    fixedBytesLength = 37; 
+    docValuesFieldPrefix = r.nextLong();
+    codecProvider =  w.getConfig().getCodecProvider();
+    switchDoDocValues();
   } 
 
+  private void switchDoDocValues() {
+    // randomly enable / disable docValues 
+    doDocValues = r.nextInt(10) != 0;
+  }
+  
   /**
    * Adds a Document.
    * @see IndexWriter#addDocument(Document)
    */
   public void addDocument(Document doc) throws IOException {
+    if (doDocValues) {
+      randomPerDocFieldValues(r, doc);
+    }
     w.addDocument(doc);
+    
     maybeCommit();
   }
+  
+  private void randomPerDocFieldValues(Random random, Document doc) {
+    
+    Type[] values = Type.values();
+    Type type = values[random.nextInt(values.length)];
+    String name = "random_" + type.name() + "" + docValuesFieldPrefix;
+    if ("PreFlex".equals(codecProvider.getFieldCodec(name)) || doc.getFieldable(name) != null)
+        return;
+    DocValuesField docValuesField = new DocValuesField(name);
+    switch (type) {
+    case BYTES_FIXED_DEREF:
+    case BYTES_FIXED_SORTED:
+    case BYTES_FIXED_STRAIGHT:
+      final String randomUnicodeString = _TestUtil.randomUnicodeString(random, fixedBytesLength);
+      BytesRef fixedRef = new BytesRef(randomUnicodeString);
+      if (fixedRef.length > fixedBytesLength) {
+        fixedRef = new BytesRef(fixedRef.bytes, 0, fixedBytesLength);
+      } else {
+        fixedRef.grow(fixedBytesLength);
+        fixedRef.length = fixedBytesLength;
+      }
+      docValuesField.setBytes(fixedRef, type);
+      break;
+    case BYTES_VAR_DEREF:
+    case BYTES_VAR_SORTED:
+    case BYTES_VAR_STRAIGHT:
+      BytesRef ref = new BytesRef(_TestUtil.randomUnicodeString(random, 200));
+      docValuesField.setBytes(ref, type);
+      break;
+    case FLOAT_32:
+      docValuesField.setFloat(random.nextFloat());
+      break;
+    case FLOAT_64:
+      docValuesField.setFloat(random.nextDouble());
+      break;
+    case INTS:
+      docValuesField.setInt(random.nextInt());
+      break;
+    default:
+      throw new IllegalArgumentException("no such type: " + type);
+    }
+
+    doc.add(docValuesField);
+  }
 
   private void maybeCommit() throws IOException {
     if (docCount++ == flushAt) {
@@ -113,6 +184,7 @@ public class RandomIndexWriter implement
         // gradually but exponentially increase time b/w flushes
         flushAtFactor *= 1.05;
       }
+      switchDoDocValues();
     }
   }
   
@@ -121,6 +193,9 @@ public class RandomIndexWriter implement
    * @see IndexWriter#updateDocument(Term, Document)
    */
   public void updateDocument(Term t, Document doc) throws IOException {
+    if (doDocValues) {
+      randomPerDocFieldValues(r, doc);
+    }
     w.updateDocument(t, doc);
     maybeCommit();
   }
@@ -135,6 +210,7 @@ public class RandomIndexWriter implement
   
   public void commit() throws CorruptIndexException, IOException {
     w.commit();
+    switchDoDocValues();
   }
   
   public int numDocs() throws IOException {
@@ -164,6 +240,7 @@ public class RandomIndexWriter implement
       w.optimize(limit);
       assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
     }
+    switchDoDocValues();
   }
 
   public IndexReader getReader(boolean applyDeletions) throws IOException {
@@ -184,6 +261,7 @@ public class RandomIndexWriter implement
         System.out.println("RIW.getReader: open new reader");
       }
       w.commit();
+      switchDoDocValues();
       return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider());
     }
   }