You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/06/11 17:55:45 UTC
svn commit: r1348919 - in /lucene/dev/branches/branch_4x/lucene:
core/src/java/org/apache/lucene/codecs/lucene40/
core/src/java/org/apache/lucene/codecs/pulsing/
core/src/test/org/apache/lucene/codecs/lucene40/
test-framework/src/java/org/apache/lucene...
Author: rmuir
Date: Mon Jun 11 15:55:44 2012
New Revision: 1348919
URL: http://svn.apache.org/viewvc?rev=1348919&view=rev
Log:
LUCENE-4129: add codecheader to .frq/.prx
Added:
lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestAllFilesHaveCodecHeader.java (with props)
Modified:
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java?rev=1348919&r1=1348918&r2=1348919&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsFormat.java Mon Jun 11 15:55:44 2012
@@ -159,7 +159,8 @@ import org.apache.lucene.util.fst.FST; /
* with the frequency of the term in that document (except when frequencies are
* omitted: {@link IndexOptions#DOCS_ONLY}).</p>
* <ul>
- * <li>FreqFile (.frq) --> <TermFreqs, SkipData> <sup>TermCount</sup></li>
+ * <li>FreqFile (.frq) --> Header, <TermFreqs, SkipData> <sup>TermCount</sup></li>
+ * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>TermFreqs --> <TermFreq> <sup>DocFreq</sup></li>
* <li>TermFreq --> DocDelta[, Freq?]</li>
* <li>SkipData --> <<SkipLevelLength, SkipLevel>
@@ -232,7 +233,8 @@ import org.apache.lucene.util.fst.FST; /
* anything into this file, and if all fields in the index omit positional data
* then the .prx file will not exist.</p>
* <ul>
- * <li>ProxFile (.prx) --> <TermPositions> <sup>TermCount</sup></li>
+ * <li>ProxFile (.prx) --> Header, <TermPositions> <sup>TermCount</sup></li>
+ * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li>
* <li>TermPositions --> <Positions> <sup>DocFreq</sup></li>
* <li>Positions --> <PositionDelta,PayloadLength?,OffsetDelta?,OffsetLength?,PayloadData?> <sup>Freq</sup></li>
* <li>PositionDelta,OffsetDelta,OffsetLength,PayloadLength --> {@link DataOutput#writeVInt VInt}</li>
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java?rev=1348919&r1=1348918&r2=1348919&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java Mon Jun 11 15:55:44 2012
@@ -37,6 +37,7 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.IOUtils;
/**
* Concrete class that reads the 4.0 frq/prox
@@ -58,29 +59,35 @@ public class Lucene40PostingsReader exte
// private String segment;
public Lucene40PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, String segmentSuffix) throws IOException {
- freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION),
+ boolean success = false;
+ IndexInput freqIn = null;
+ IndexInput proxIn = null;
+ try {
+ freqIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION),
ioContext);
- // TODO: hasProx should (somehow!) become codec private,
- // but it's tricky because 1) FIS.hasProx is global (it
- // could be all fields that have prox are written by a
- // different codec), 2) the field may have had prox in
- // the past but all docs w/ that field were deleted.
- // Really we'd need to init prxOut lazily on write, and
- // then somewhere record that we actually wrote it so we
- // know whether to open on read:
- if (fieldInfos.hasProx()) {
- boolean success = false;
- try {
+ CodecUtil.checkHeader(freqIn, Lucene40PostingsWriter.FRQ_CODEC, Lucene40PostingsWriter.VERSION_START,Lucene40PostingsWriter.VERSION_START);
+ // TODO: hasProx should (somehow!) become codec private,
+ // but it's tricky because 1) FIS.hasProx is global (it
+ // could be all fields that have prox are written by a
+ // different codec), 2) the field may have had prox in
+ // the past but all docs w/ that field were deleted.
+ // Really we'd need to init prxOut lazily on write, and
+ // then somewhere record that we actually wrote it so we
+ // know whether to open on read:
+ if (fieldInfos.hasProx()) {
proxIn = dir.openInput(IndexFileNames.segmentFileName(segmentInfo.name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION),
- ioContext);
- success = true;
- } finally {
- if (!success) {
- freqIn.close();
- }
+ ioContext);
+ CodecUtil.checkHeader(proxIn, Lucene40PostingsWriter.PRX_CODEC, Lucene40PostingsWriter.VERSION_START,Lucene40PostingsWriter.VERSION_START);
+ } else {
+ proxIn = null;
+ }
+ this.freqIn = freqIn;
+ this.proxIn = proxIn;
+ success = true;
+ } finally {
+ if (!success) {
+ IOUtils.closeWhileHandlingException(freqIn, proxIn);
}
- } else {
- proxIn = null;
}
}
@@ -88,7 +95,7 @@ public class Lucene40PostingsReader exte
public void init(IndexInput termsIn) throws IOException {
// Make sure we are talking to the matching past writer
- CodecUtil.checkHeader(termsIn, Lucene40PostingsWriter.CODEC,
+ CodecUtil.checkHeader(termsIn, Lucene40PostingsWriter.TERMS_CODEC,
Lucene40PostingsWriter.VERSION_START, Lucene40PostingsWriter.VERSION_START);
skipInterval = termsIn.readInt();
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java?rev=1348919&r1=1348918&r2=1348919&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsWriter.java Mon Jun 11 15:55:44 2012
@@ -45,7 +45,9 @@ import org.apache.lucene.util.IOUtils;
* @lucene.experimental
*/
public final class Lucene40PostingsWriter extends PostingsWriterBase {
- final static String CODEC = "Lucene40PostingsWriter";
+ final static String TERMS_CODEC = "Lucene40PostingsWriterTerms";
+ final static String FRQ_CODEC = "Lucene40PostingsWriterFrq";
+ final static String PRX_CODEC = "Lucene40PostingsWriterPrx";
//private static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
@@ -102,7 +104,9 @@ public final class Lucene40PostingsWrite
String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION);
freqOut = state.directory.createOutput(fileName, state.context);
boolean success = false;
+ IndexOutput proxOut = null;
try {
+ CodecUtil.writeHeader(freqOut, FRQ_CODEC, VERSION_CURRENT);
// TODO: this is a best effort, if one of these fields has no postings
// then we make an empty prx file, same as if we are wrapped in
// per-field postingsformat. maybe... we shouldn't
@@ -112,14 +116,16 @@ public final class Lucene40PostingsWrite
// prox file
fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION);
proxOut = state.directory.createOutput(fileName, state.context);
+ CodecUtil.writeHeader(proxOut, PRX_CODEC, VERSION_CURRENT);
} else {
// Every field omits TF so we will write no prox file
proxOut = null;
}
+ this.proxOut = proxOut;
success = true;
} finally {
if (!success) {
- IOUtils.closeWhileHandlingException(freqOut);
+ IOUtils.closeWhileHandlingException(freqOut, proxOut);
}
}
@@ -135,7 +141,7 @@ public final class Lucene40PostingsWrite
@Override
public void start(IndexOutput termsOut) throws IOException {
this.termsOut = termsOut;
- CodecUtil.writeHeader(termsOut, CODEC, VERSION_CURRENT);
+ CodecUtil.writeHeader(termsOut, TERMS_CODEC, VERSION_CURRENT);
termsOut.writeInt(skipInterval); // write skipInterval
termsOut.writeInt(maxSkipLevels); // write maxSkipLevels
termsOut.writeInt(skipMinimum); // write skipMinimum
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java?rev=1348919&r1=1348918&r2=1348919&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsFormat.java Mon Jun 11 15:55:44 2012
@@ -29,6 +29,7 @@ import org.apache.lucene.codecs.Postings
import org.apache.lucene.codecs.PostingsWriterBase;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.IOUtils;
/** This postings format "inlines" the postings for terms that have
* low docFreq. It wraps another postings format, which is used for
@@ -65,33 +66,39 @@ public abstract class PulsingPostingsFor
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- PostingsWriterBase docsWriter = wrappedPostingsBaseFormat.postingsWriterBase(state);
+ PostingsWriterBase docsWriter = null;
// Terms that have <= freqCutoff number of docs are
// "pulsed" (inlined):
- PostingsWriterBase pulsingWriter = new PulsingPostingsWriter(freqCutoff, docsWriter);
+ PostingsWriterBase pulsingWriter = null;
// Terms dict
boolean success = false;
try {
+ docsWriter = wrappedPostingsBaseFormat.postingsWriterBase(state);
+
+ // Terms that have <= freqCutoff number of docs are
+ // "pulsed" (inlined):
+ pulsingWriter = new PulsingPostingsWriter(freqCutoff, docsWriter);
FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter, minBlockSize, maxBlockSize);
success = true;
return ret;
} finally {
if (!success) {
- pulsingWriter.close();
+ IOUtils.closeWhileHandlingException(docsWriter, pulsingWriter);
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
-
- PostingsReaderBase docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
- PostingsReaderBase pulsingReader = new PulsingPostingsReader(docsReader);
+ PostingsReaderBase docsReader = null;
+ PostingsReaderBase pulsingReader = null;
boolean success = false;
try {
+ docsReader = wrappedPostingsBaseFormat.postingsReaderBase(state);
+ pulsingReader = new PulsingPostingsReader(docsReader);
FieldsProducer ret = new BlockTreeTermsReader(
state.dir, state.fieldInfos, state.segmentInfo.name,
pulsingReader,
@@ -102,7 +109,7 @@ public abstract class PulsingPostingsFor
return ret;
} finally {
if (!success) {
- pulsingReader.close();
+ IOUtils.closeWhileHandlingException(docsReader, pulsingReader);
}
}
}
Added: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestAllFilesHaveCodecHeader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestAllFilesHaveCodecHeader.java?rev=1348919&view=auto
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestAllFilesHaveCodecHeader.java (added)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestAllFilesHaveCodecHeader.java Mon Jun 11 15:55:44 2012
@@ -0,0 +1,98 @@
+package org.apache.lucene.codecs.lucene40;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.store.CompoundFileDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.CodecUtil;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ * Test that a plain Lucene40Codec puts codec headers in all files.
+ */
+public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
+ public void test() throws Exception {
+ Directory dir = newDirectory();
+ IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ conf.setCodec(Codec.forName("Lucene40"));
+ // riw should sometimes create docvalues fields, etc
+ RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
+ Document doc = new Document();
+ // these fields should sometimes get term vectors, etc
+ Field idField = newStringField("id", "", Field.Store.NO);
+ Field bodyField = newTextField("body", "", Field.Store.NO);
+ doc.add(idField);
+ doc.add(bodyField);
+ for (int i = 0; i < 100; i++) {
+ idField.setStringValue(Integer.toString(i));
+ bodyField.setStringValue(_TestUtil.randomUnicodeString(random()));
+ riw.addDocument(doc);
+ if (random().nextInt(7) == 0) {
+ riw.commit();
+ }
+ }
+ riw.close();
+ checkHeaders(dir);
+ dir.close();
+ }
+
+ private void checkHeaders(Directory dir) throws IOException {
+ for (String file : dir.listAll()) {
+ if (file.equals(IndexFileNames.SEGMENTS_GEN)) {
+ continue; // segments.gen has no header, thats ok
+ }
+ if (file.endsWith(IndexFileNames.COMPOUND_FILE_EXTENSION)) {
+ /* TODO: enable this after resolving LUCENE-4130
+ * CompoundFileDirectory cfsDir = new CompoundFileDirectory(dir, file, newIOContext(random()), false);
+ * checkHeaders(cfsDir); // recurse into cfs
+ * cfsDir.close();
+ */
+ continue; // .cfs has its own header... would be nice to fix
+ }
+ if (file.endsWith(IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)) {
+ continue; // .cfe has its own header... would be nice to fix
+ }
+ IndexInput in = null;
+ boolean success = false;
+ try {
+ in = dir.openInput(file, newIOContext(random()));
+ int val = in.readInt();
+ assertEquals(file + " has no codec header, instead found: " + val, CodecUtil.CODEC_MAGIC, val);
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(in);
+ } else {
+ IOUtils.closeWhileHandlingException(in);
+ }
+ }
+ }
+ }
+}
Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java?rev=1348919&r1=1348918&r2=1348919&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/codecs/nestedpulsing/NestedPulsingPostingsFormat.java Mon Jun 11 15:55:44 2012
@@ -32,6 +32,7 @@ import org.apache.lucene.codecs.pulsing.
import org.apache.lucene.codecs.pulsing.PulsingPostingsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.IOUtils;
/**
* Pulsing(1, Pulsing(2, Lucene40))
@@ -47,32 +48,38 @@ public class NestedPulsingPostingsFormat
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
- PostingsWriterBase docsWriter = new Lucene40PostingsWriter(state);
-
- PostingsWriterBase pulsingWriterInner = new PulsingPostingsWriter(2, docsWriter);
- PostingsWriterBase pulsingWriter = new PulsingPostingsWriter(1, pulsingWriterInner);
+ PostingsWriterBase docsWriter = null;
+ PostingsWriterBase pulsingWriterInner = null;
+ PostingsWriterBase pulsingWriter = null;
// Terms dict
boolean success = false;
try {
+ docsWriter = new Lucene40PostingsWriter(state);
+
+ pulsingWriterInner = new PulsingPostingsWriter(2, docsWriter);
+ pulsingWriter = new PulsingPostingsWriter(1, pulsingWriterInner);
FieldsConsumer ret = new BlockTreeTermsWriter(state, pulsingWriter,
BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
success = true;
return ret;
} finally {
if (!success) {
- pulsingWriter.close();
+ IOUtils.closeWhileHandlingException(docsWriter, pulsingWriterInner, pulsingWriter);
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
- PostingsReaderBase docsReader = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
- PostingsReaderBase pulsingReaderInner = new PulsingPostingsReader(docsReader);
- PostingsReaderBase pulsingReader = new PulsingPostingsReader(pulsingReaderInner);
+ PostingsReaderBase docsReader = null;
+ PostingsReaderBase pulsingReaderInner = null;
+ PostingsReaderBase pulsingReader = null;
boolean success = false;
try {
+ docsReader = new Lucene40PostingsReader(state.dir, state.fieldInfos, state.segmentInfo, state.context, state.segmentSuffix);
+ pulsingReaderInner = new PulsingPostingsReader(docsReader);
+ pulsingReader = new PulsingPostingsReader(pulsingReaderInner);
FieldsProducer ret = new BlockTreeTermsReader(
state.dir, state.fieldInfos, state.segmentInfo.name,
pulsingReader,
@@ -83,7 +90,7 @@ public class NestedPulsingPostingsFormat
return ret;
} finally {
if (!success) {
- pulsingReader.close();
+ IOUtils.closeWhileHandlingException(docsReader, pulsingReaderInner, pulsingReader);
}
}
}