You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/16 13:43:51 UTC
svn commit: r1103699 - in /lucene/dev/branches/docvalues/lucene/src:
java/org/apache/lucene/document/ java/org/apache/lucene/index/
java/org/apache/lucene/index/codecs/
java/org/apache/lucene/index/codecs/preflex/
test-framework/org/apache/lucene/index/
Author: simonw
Date: Mon May 16 11:43:51 2011
New Revision: 1103699
URL: http://svn.apache.org/viewvc?rev=1103699&view=rev
Log:
LUCENE-3070: Added UOE to PreFlex Codec, Added Random DocValues injection to RandomIndexWriter, Added basic DocValues verification to CheckIndex
Modified:
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java
lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java
lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/DocValuesField.java Mon May 16 11:43:51 2011
@@ -73,7 +73,6 @@ import org.apache.lucene.util.BytesRef;
* </pre>
*
* */
-@SuppressWarnings("serial")
public class DocValuesField extends AbstractField implements PerDocFieldValues {
protected BytesRef bytes;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CheckIndex.java Mon May 16 11:43:51 2011
@@ -27,6 +27,9 @@ import org.apache.lucene.document.Abstra
import org.apache.lucene.document.Document;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
+import org.apache.lucene.index.codecs.PerDocValues;
+import org.apache.lucene.index.values.DocValues;
+import org.apache.lucene.index.values.DocValuesEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -195,6 +198,9 @@ public class CheckIndex {
/** Status for testing of term vectors (null if term vectors could not be tested). */
public TermVectorStatus termVectorStatus;
+
+ /** Status for testing of DocValues (null if DocValues could not be tested). */
+ public DocValuesStatus docValuesStatus;
}
/**
@@ -254,6 +260,15 @@ public class CheckIndex {
/** Exception thrown during term vector test (null on success) */
public Throwable error = null;
}
+
+ public static final class DocValuesStatus {
+ /** Number of documents tested. */
+ public int docCount;
+ /** Total number of docValues tested. */
+ public long totalValueFields;
+ /** Exception thrown during doc values test (null on success) */
+ public Throwable error = null;
+ }
}
/** Create a new CheckIndex on the directory. */
@@ -499,6 +514,8 @@ public class CheckIndex {
// Test Term Vectors
segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
+
+ segInfoStat.docValuesStatus = testDocValues(info, reader);
// Rethrow the first exception we encountered
// This will cause stats for failed segments to be incremented properly
@@ -510,6 +527,8 @@ public class CheckIndex {
throw new RuntimeException("Stored Field test failed");
} else if (segInfoStat.termVectorStatus.error != null) {
throw new RuntimeException("Term Vector test failed");
+ } else if (segInfoStat.docValuesStatus.error != null) {
+ throw new RuntimeException("DocValues test failed");
}
msg("");
@@ -920,6 +939,60 @@ public class CheckIndex {
return status;
}
+
+ private Status.DocValuesStatus testDocValues(SegmentInfo info,
+ SegmentReader reader) {
+ final Status.DocValuesStatus status = new Status.DocValuesStatus();
+ try {
+ if (infoStream != null) {
+ infoStream.print(" test: DocValues........");
+ }
+ final FieldInfos fieldInfos = info.getFieldInfos();
+ for (FieldInfo fieldInfo : fieldInfos) {
+ if (fieldInfo.hasDocValues()) {
+ status.totalValueFields++;
+ final PerDocValues perDocValues = reader.perDocValues();
+ final DocValues docValues = perDocValues.docValues(fieldInfo.name);
+ if (docValues == null) {
+ continue;
+ }
+ final DocValuesEnum values = docValues.getEnum();
+ while (values.nextDoc() != DocValuesEnum.NO_MORE_DOCS) {
+ switch (fieldInfo.docValues) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ values.bytes();
+ break;
+ case FLOAT_32:
+ case FLOAT_64:
+ values.getFloat();
+ break;
+ case INTS:
+ values.getInt();
+ break;
+ default:
+ throw new IllegalArgumentException("Field: " + fieldInfo.name
+ + " - no such DocValues type: " + fieldInfo.docValues);
+ }
+ }
+ }
+ }
+
+ msg("OK [" + status.docCount + " total doc Count; Num DocValues Fields "
+ + status.totalValueFields);
+ } catch (Throwable e) {
+ msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+ status.error = e;
+ if (infoStream != null) {
+ e.printStackTrace(infoStream);
+ }
+ }
+ return status;
+ }
/**
* Test term vectors for a segment.
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Mon May 16 11:43:51 2011
@@ -251,7 +251,7 @@ final class DocFieldProcessor extends Do
fieldsWriter.addField(field, fp.fieldInfo);
}
if (field.hasDocValues()) {
- final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo, fieldInfos);
+ final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo);
docValuesConsumer.add(docState.docID, field.getDocValues());
}
}
@@ -292,7 +292,7 @@ final class DocFieldProcessor extends Do
final private Map<String, DocValuesConsumer> docValues = new HashMap<String, DocValuesConsumer>();
final private Map<Integer, PerDocConsumer> perDocConsumers = new HashMap<Integer, PerDocConsumer>();
- DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo, FieldInfos infos)
+ DocValuesConsumer docValuesConsumer(DocState docState, FieldInfo fieldInfo)
throws IOException {
DocValuesConsumer docValuesConsumer = docValues.get(fieldInfo.name);
if (docValuesConsumer != null) {
@@ -303,12 +303,12 @@ final class DocFieldProcessor extends Do
PerDocWriteState perDocWriteState = docState.docWriter.newPerDocWriteState(fieldInfo.getCodecId());
SegmentCodecs codecs = perDocWriteState.segmentCodecs;
assert codecs.codecs.length > fieldInfo.getCodecId();
-
Codec codec = codecs.codecs[fieldInfo.getCodecId()];
perDocConsumer = codec.docsConsumer(perDocWriteState);
perDocConsumers.put(Integer.valueOf(fieldInfo.getCodecId()), perDocConsumer);
}
docValuesConsumer = perDocConsumer.addValuesField(fieldInfo);
+ fieldInfo.commitDocValues();
docValues.put(fieldInfo.name, docValuesConsumer);
return docValuesConsumer;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java Mon May 16 11:43:51 2011
@@ -127,6 +127,7 @@ public final class FieldInfo {
}
private boolean vectorsCommitted;
+ private boolean docValuesCommitted;
/**
* Reverts all uncommitted changes on this {@link FieldInfo}
@@ -138,6 +139,10 @@ public final class FieldInfo {
storePositionWithTermVector = false;
storeTermVector = false;
}
+
+ if (docValues != null && !docValuesCommitted) {
+ docValues = null;
+ }
}
/**
@@ -150,4 +155,9 @@ public final class FieldInfo {
assert storeTermVector;
vectorsCommitted = true;
}
+
+ void commitDocValues() {
+ assert hasDocValues();
+ docValuesCommitted = true;
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java Mon May 16 11:43:51 2011
@@ -750,5 +750,5 @@ public final class FieldInfos implements
}
return roFis;
}
-
+
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/PerFieldCodecWrapper.java Mon May 16 11:43:51 2011
@@ -317,22 +317,22 @@ final class PerFieldCodecWrapper extends
}
private final class PerDocConsumers extends PerDocConsumer {
- private final ArrayList<PerDocConsumer> consumers = new ArrayList<PerDocConsumer>();
+ private final PerDocConsumer[] consumers;
+ private final Codec[] codecs;
+ private final PerDocWriteState state;
public PerDocConsumers(PerDocWriteState state) throws IOException {
assert segmentCodecs == state.segmentCodecs;
- final Codec[] codecs = segmentCodecs.codecs;
- for (int i = 0; i < codecs.length; i++) {
- consumers.add(codecs[i].docsConsumer(new PerDocWriteState(state, i)));
- }
+ this.state = state;
+ codecs = segmentCodecs.codecs;
+ consumers = new PerDocConsumer[codecs.length];
}
public void close() throws IOException {
- Iterator<PerDocConsumer> it = consumers.iterator();
IOException err = null;
- while (it.hasNext()) {
+ for (int i = 0; i < consumers.length; i++) {
try {
- PerDocConsumer next = it.next();
+ final PerDocConsumer next = consumers[i];
if (next != null) {
next.close();
}
@@ -351,10 +351,13 @@ final class PerFieldCodecWrapper extends
@Override
public DocValuesConsumer addValuesField(FieldInfo field) throws IOException {
- assert field.getCodecId() != FieldInfo.UNASSIGNED_CODEC_ID;
- final PerDocConsumer perDoc = consumers.get(field.getCodecId());
+ final int codecId = field.getCodecId();
+ assert codecId != FieldInfo.UNASSIGNED_CODEC_ID;
+ PerDocConsumer perDoc = consumers[codecId];
if (perDoc == null) {
- return null;
+ perDoc = codecs[codecId].docsConsumer(new PerDocWriteState(state, codecId));
+ assert perDoc != null;
+ consumers[codecId] = perDoc;
}
return perDoc.addValuesField(field);
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/DefaultDocValuesConsumer.java Mon May 16 11:43:51 2011
@@ -77,6 +77,7 @@ public class DefaultDocValuesConsumer ex
Writer.INDEX_EXTENSION));
assert dir.fileExists(IndexFileNames.segmentFileName(filename, "",
Writer.INDEX_EXTENSION));
+ // until here all types use an index
case BYTES_FIXED_STRAIGHT:
case FLOAT_32:
case FLOAT_64:
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/codecs/preflex/PreFlexCodec.java Mon May 16 11:43:51 2011
@@ -84,11 +84,11 @@ public class PreFlexCodec extends Codec
@Override
public PerDocConsumer docsConsumer(PerDocWriteState state) throws IOException {
- return null;
+ throw new UnsupportedOperationException("PerDocConsumer is not supported by Preflex codec");
}
@Override
public PerDocValues docsProducer(SegmentReadState state) throws IOException {
- return null;
+ throw new UnsupportedOperationException("PerDocValues is not supported by Preflex codec");
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java?rev=1103699&r1=1103698&r2=1103699&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java Mon May 16 11:43:51 2011
@@ -23,9 +23,13 @@ import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.DocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter; // javadoc
+import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.values.Type;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
import org.apache.lucene.util._TestUtil;
@@ -44,6 +48,10 @@ public class RandomIndexWriter implement
int flushAt;
private double flushAtFactor = 1.0;
private boolean getReaderCalled;
+ private final int fixedBytesLength;
+ private final long docValuesFieldPrefix;
+ private volatile boolean doDocValues;
+ private CodecProvider codecProvider;
// Randomly calls Thread.yield so we mixup thread scheduling
private static final class MockIndexWriter extends IndexWriter {
@@ -91,16 +99,79 @@ public class RandomIndexWriter implement
System.out.println("codec default=" + w.getConfig().getCodecProvider().getDefaultFieldCodec());
w.setInfoStream(System.out);
}
+ /* TODO: find some what to make that random...
+ * This must be fixed across all fixed bytes
+ * fields in one index. so if you open another writer
+ * this might change if I use r.nextInt(x)
+ * maybe we can peek at the existing files here?
+ */
+ fixedBytesLength = 37;
+ docValuesFieldPrefix = r.nextLong();
+ codecProvider = w.getConfig().getCodecProvider();
+ switchDoDocValues();
}
+ private void switchDoDocValues() {
+ // randomly enable / disable docValues
+ doDocValues = r.nextInt(10) != 0;
+ }
+
/**
* Adds a Document.
* @see IndexWriter#addDocument(Document)
*/
public void addDocument(Document doc) throws IOException {
+ if (doDocValues) {
+ randomPerDocFieldValues(r, doc);
+ }
w.addDocument(doc);
+
maybeCommit();
}
+
+ private void randomPerDocFieldValues(Random random, Document doc) {
+
+ Type[] values = Type.values();
+ Type type = values[random.nextInt(values.length)];
+ String name = "random_" + type.name() + "" + docValuesFieldPrefix;
+ if ("PreFlex".equals(codecProvider.getFieldCodec(name)) || doc.getFieldable(name) != null)
+ return;
+ DocValuesField docValuesField = new DocValuesField(name);
+ switch (type) {
+ case BYTES_FIXED_DEREF:
+ case BYTES_FIXED_SORTED:
+ case BYTES_FIXED_STRAIGHT:
+ final String randomUnicodeString = _TestUtil.randomUnicodeString(random, fixedBytesLength);
+ BytesRef fixedRef = new BytesRef(randomUnicodeString);
+ if (fixedRef.length > fixedBytesLength) {
+ fixedRef = new BytesRef(fixedRef.bytes, 0, fixedBytesLength);
+ } else {
+ fixedRef.grow(fixedBytesLength);
+ fixedRef.length = fixedBytesLength;
+ }
+ docValuesField.setBytes(fixedRef, type);
+ break;
+ case BYTES_VAR_DEREF:
+ case BYTES_VAR_SORTED:
+ case BYTES_VAR_STRAIGHT:
+ BytesRef ref = new BytesRef(_TestUtil.randomUnicodeString(random, 200));
+ docValuesField.setBytes(ref, type);
+ break;
+ case FLOAT_32:
+ docValuesField.setFloat(random.nextFloat());
+ break;
+ case FLOAT_64:
+ docValuesField.setFloat(random.nextDouble());
+ break;
+ case INTS:
+ docValuesField.setInt(random.nextInt());
+ break;
+ default:
+ throw new IllegalArgumentException("no such type: " + type);
+ }
+
+ doc.add(docValuesField);
+ }
private void maybeCommit() throws IOException {
if (docCount++ == flushAt) {
@@ -113,6 +184,7 @@ public class RandomIndexWriter implement
// gradually but exponentially increase time b/w flushes
flushAtFactor *= 1.05;
}
+ switchDoDocValues();
}
}
@@ -121,6 +193,9 @@ public class RandomIndexWriter implement
* @see IndexWriter#updateDocument(Term, Document)
*/
public void updateDocument(Term t, Document doc) throws IOException {
+ if (doDocValues) {
+ randomPerDocFieldValues(r, doc);
+ }
w.updateDocument(t, doc);
maybeCommit();
}
@@ -135,6 +210,7 @@ public class RandomIndexWriter implement
public void commit() throws CorruptIndexException, IOException {
w.commit();
+ switchDoDocValues();
}
public int numDocs() throws IOException {
@@ -164,6 +240,7 @@ public class RandomIndexWriter implement
w.optimize(limit);
assert w.getSegmentCount() <= limit: "limit=" + limit + " actual=" + w.getSegmentCount();
}
+ switchDoDocValues();
}
public IndexReader getReader(boolean applyDeletions) throws IOException {
@@ -184,6 +261,7 @@ public class RandomIndexWriter implement
System.out.println("RIW.getReader: open new reader");
}
w.commit();
+ switchDoDocValues();
return IndexReader.open(w.getDirectory(), new KeepOnlyLastCommitDeletionPolicy(), r.nextBoolean(), _TestUtil.nextInt(r, 1, 10), w.getConfig().getCodecProvider());
}
}