You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/11/01 17:53:43 UTC
svn commit: r1196125 - in /lucene/dev/branches/lucene2621/lucene/src:
java/org/apache/lucene/index/codecs/pulsing/ resources/META-INF/services/
test-framework/java/org/apache/lucene/index/ test/org/apache/lucene/
test/org/apache/lucene/index/ test/org/...
Author: rmuir
Date: Tue Nov 1 16:53:43 2011
New Revision: 1196125
URL: http://svn.apache.org/viewvc?rev=1196125&view=rev
Log:
LUCENE-3490: fix pulsing
Added:
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/pulsing/Pulsing40PostingsFormat.java (with props)
Modified:
lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsFormat.java
lucene/dev/branches/lucene2621/lucene/src/resources/META-INF/services/org.apache.lucene.index.codecs.PostingsFormat
lucene/dev/branches/lucene2621/lucene/src/test-framework/java/org/apache/lucene/index/RandomCodec.java
lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/codecs/pulsing/Test10KPulsings.java
lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/codecs/pulsing/TestPulsingReuse.java
Added: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/pulsing/Pulsing40PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/pulsing/Pulsing40PostingsFormat.java?rev=1196125&view=auto
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/pulsing/Pulsing40PostingsFormat.java (added)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/pulsing/Pulsing40PostingsFormat.java Tue Nov 1 16:53:43 2011
@@ -0,0 +1,42 @@
+package org.apache.lucene.index.codecs.pulsing;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.codecs.BlockTreeTermsWriter;
+import org.apache.lucene.index.codecs.lucene40.Lucene40PostingsBaseFormat;
+
+/**
+ * @lucene.experimental
+ */
+public class Pulsing40PostingsFormat extends PulsingPostingsFormat {
+
+ /** Inlines docFreq=1 terms, otherwise uses the normal "Lucene40" format. */
+ public Pulsing40PostingsFormat() {
+ this(1);
+ }
+
+ /** Inlines docFreq=<code>freqCutoff</code> terms, otherwise uses the normal "Lucene40" format. */
+ public Pulsing40PostingsFormat(int freqCutoff) {
+ this(freqCutoff, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
+ }
+
+ /** Inlines docFreq=<code>freqCutoff</code> terms, otherwise uses the normal "Lucene40" format. */
+ public Pulsing40PostingsFormat(int freqCutoff, int minBlockSize, int maxBlockSize) {
+ super("Pulsing40", new Lucene40PostingsBaseFormat(), freqCutoff, minBlockSize, maxBlockSize);
+ }
+}
Modified: lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsFormat.java?rev=1196125&r1=1196124&r2=1196125&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsFormat.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsFormat.java Tue Nov 1 16:53:43 2011
@@ -31,40 +31,29 @@ import org.apache.lucene.index.codecs.Po
import org.apache.lucene.index.codecs.PostingsFormat;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
-import org.apache.lucene.index.codecs.lucene40.Lucene40PostingsBaseFormat;
import org.apache.lucene.store.Directory;
/** This postings format "inlines" the postings for terms that have
* low docFreq. It wraps another postings format, which is used for
* writing the non-inlined terms.
*
- * Currently in only inlines docFreq=1 terms, and
- * otherwise uses the normal "Lucene40" format.
* @lucene.experimental */
-// nocommit: this should be abstract, and we should have concrete PulsingStandard
-// this way its written into the index (the format name) what the wrapped format is.
-// otherwise, pulsing will not be able to be read!@
-public class PulsingPostingsFormat extends PostingsFormat {
+public abstract class PulsingPostingsFormat extends PostingsFormat {
private final int freqCutoff;
private final int minBlockSize;
private final int maxBlockSize;
private final PostingsBaseFormat wrappedPostingsBaseFormat;
- // nocommit: maybe create subclass with Lucene40 postings, e.g. Lucene40PulsingPostingsFormat -- se above comment
- public PulsingPostingsFormat() {
- this(new Lucene40PostingsBaseFormat(), 1);
- }
-
- public PulsingPostingsFormat(PostingsBaseFormat wrappedPostingsBaseFormat, int freqCutoff) {
- this(wrappedPostingsBaseFormat, freqCutoff, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
+ public PulsingPostingsFormat(String name, PostingsBaseFormat wrappedPostingsBaseFormat, int freqCutoff) {
+ this(name, wrappedPostingsBaseFormat, freqCutoff, BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE, BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
}
/** Terms with freq <= freqCutoff are inlined into terms
* dict. */
- public PulsingPostingsFormat(PostingsBaseFormat wrappedPostingsBaseFormat, int freqCutoff, int minBlockSize, int maxBlockSize) {
- super("Pulsing");
+ public PulsingPostingsFormat(String name, PostingsBaseFormat wrappedPostingsBaseFormat, int freqCutoff, int minBlockSize, int maxBlockSize) {
+ super(name);
this.freqCutoff = freqCutoff;
this.minBlockSize = minBlockSize;
assert minBlockSize > 1;
Modified: lucene/dev/branches/lucene2621/lucene/src/resources/META-INF/services/org.apache.lucene.index.codecs.PostingsFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/resources/META-INF/services/org.apache.lucene.index.codecs.PostingsFormat?rev=1196125&r1=1196124&r2=1196125&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/resources/META-INF/services/org.apache.lucene.index.codecs.PostingsFormat (original)
+++ lucene/dev/branches/lucene2621/lucene/src/resources/META-INF/services/org.apache.lucene.index.codecs.PostingsFormat Tue Nov 1 16:53:43 2011
@@ -14,6 +14,6 @@
# limitations under the License.
org.apache.lucene.index.codecs.lucene40.Lucene40PostingsFormat
-org.apache.lucene.index.codecs.pulsing.PulsingPostingsFormat
+org.apache.lucene.index.codecs.pulsing.Pulsing40PostingsFormat
org.apache.lucene.index.codecs.simpletext.SimpleTextPostingsFormat
org.apache.lucene.index.codecs.memory.MemoryPostingsFormat
Modified: lucene/dev/branches/lucene2621/lucene/src/test-framework/java/org/apache/lucene/index/RandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test-framework/java/org/apache/lucene/index/RandomCodec.java?rev=1196125&r1=1196124&r2=1196125&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test-framework/java/org/apache/lucene/index/RandomCodec.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/test-framework/java/org/apache/lucene/index/RandomCodec.java Tue Nov 1 16:53:43 2011
@@ -34,6 +34,7 @@ import org.apache.lucene.index.codecs.mo
import org.apache.lucene.index.codecs.mockintblock.MockVariableIntBlockPostingsFormat;
import org.apache.lucene.index.codecs.mockrandom.MockRandomPostingsFormat;
import org.apache.lucene.index.codecs.mocksep.MockSepPostingsFormat;
+import org.apache.lucene.index.codecs.pulsing.Pulsing40PostingsFormat;
import org.apache.lucene.index.codecs.pulsing.PulsingPostingsFormat;
import org.apache.lucene.index.codecs.simpletext.SimpleTextPostingsFormat;
import org.apache.lucene.util._TestUtil;
@@ -88,7 +89,7 @@ public class RandomCodec extends Lucene4
// block via CL:
minItemsPerBlock = _TestUtil.nextInt(random, 2, 100);
maxItemsPerBlock = 2*(Math.max(1, minItemsPerBlock-1)) + random.nextInt(100);
- register(new PulsingPostingsFormat(new Lucene40PostingsBaseFormat(), 1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock));
+ register(new Pulsing40PostingsFormat(1 + random.nextInt(20), minItemsPerBlock, maxItemsPerBlock));
register(new MockSepPostingsFormat());
register(new MockFixedIntBlockPostingsFormat(_TestUtil.nextInt(random, 1, 2000)));
register(new MockVariableIntBlockPostingsFormat( _TestUtil.nextInt(random, 1, 127)));
Modified: lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/TestExternalCodecs.java?rev=1196125&r1=1196124&r2=1196125&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/TestExternalCodecs.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/TestExternalCodecs.java Tue Nov 1 16:53:43 2011
@@ -27,6 +27,7 @@ import org.apache.lucene.index.codecs.*;
import org.apache.lucene.index.codecs.lucene40.Lucene40PostingsBaseFormat;
import org.apache.lucene.index.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.index.codecs.perfield.PerFieldPostingsFormat;
+import org.apache.lucene.index.codecs.pulsing.Pulsing40PostingsFormat;
import org.apache.lucene.index.codecs.pulsing.PulsingPostingsFormat;
import org.apache.lucene.search.*;
import org.apache.lucene.store.*;
@@ -513,7 +514,7 @@ public class TestExternalCodecs extends
private static class CustomPerFieldPostingsFormat extends PerFieldPostingsFormat {
private final PostingsFormat ramFormat = new RAMOnlyPostingsFormat();
private final PostingsFormat defaultFormat = new Lucene40PostingsFormat();
- private final PostingsFormat pulsingFormat = new PulsingPostingsFormat(new Lucene40PostingsBaseFormat(), 1);
+ private final PostingsFormat pulsingFormat = new Pulsing40PostingsFormat(1);
@Override
public PostingsFormat getPostingsFormatForField(String field) {
Modified: lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java?rev=1196125&r1=1196124&r2=1196125&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java Tue Nov 1 16:53:43 2011
@@ -35,6 +35,7 @@ import org.apache.lucene.index.codecs.lu
import org.apache.lucene.index.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.index.codecs.mocksep.MockSepPostingsFormat;
import org.apache.lucene.index.codecs.perfield.PerFieldPostingsFormat;
+import org.apache.lucene.index.codecs.pulsing.Pulsing40PostingsFormat;
import org.apache.lucene.index.codecs.pulsing.PulsingPostingsFormat;
import org.apache.lucene.index.codecs.simpletext.SimpleTextPostingsFormat;
import org.apache.lucene.search.DocIdSetIterator;
@@ -1169,7 +1170,7 @@ public class TestAddIndexes extends Luce
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random));
- conf.setCodec(_TestUtil.alwaysFormat(new PulsingPostingsFormat(new Lucene40PostingsBaseFormat(), 1 + random.nextInt(20))));
+ conf.setCodec(_TestUtil.alwaysFormat(new Pulsing40PostingsFormat(1 + random.nextInt(20))));
IndexWriter w = new IndexWriter(dir, conf);
try {
w.addIndexes(toAdd);
@@ -1188,7 +1189,7 @@ public class TestAddIndexes extends Luce
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random));
- conf.setCodec(_TestUtil.alwaysFormat(new PulsingPostingsFormat(new Lucene40PostingsBaseFormat(), 1 + random.nextInt(20))));
+ conf.setCodec(_TestUtil.alwaysFormat(new Pulsing40PostingsFormat(1 + random.nextInt(20))));
IndexWriter w = new IndexWriter(dir, conf);
IndexReader indexReader = IndexReader.open(toAdd);
try {
Modified: lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/codecs/pulsing/Test10KPulsings.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/codecs/pulsing/Test10KPulsings.java?rev=1196125&r1=1196124&r2=1196125&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/codecs/pulsing/Test10KPulsings.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/codecs/pulsing/Test10KPulsings.java Tue Nov 1 16:53:43 2011
@@ -54,7 +54,7 @@ import org.junit.Ignore;
public class Test10KPulsings extends LuceneTestCase {
public void test10kPulsed() throws Exception {
// we always run this test with pulsing codec.
- Codec cp = _TestUtil.alwaysFormat(new PulsingPostingsFormat(new Lucene40PostingsBaseFormat(), 1));
+ Codec cp = _TestUtil.alwaysFormat(new Pulsing40PostingsFormat(1));
File f = _TestUtil.getTempDir("10kpulsed");
MockDirectoryWrapper dir = newFSDirectory(f);
@@ -105,7 +105,7 @@ public class Test10KPulsings extends Luc
public void test10kNotPulsed() throws Exception {
// we always run this test with pulsing codec.
int freqCutoff = _TestUtil.nextInt(random, 1, 10);
- Codec cp = _TestUtil.alwaysFormat(new PulsingPostingsFormat(new Lucene40PostingsBaseFormat(), freqCutoff));
+ Codec cp = _TestUtil.alwaysFormat(new Pulsing40PostingsFormat(freqCutoff));
File f = _TestUtil.getTempDir("10knotpulsed");
MockDirectoryWrapper dir = newFSDirectory(f);
Modified: lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/codecs/pulsing/TestPulsingReuse.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/codecs/pulsing/TestPulsingReuse.java?rev=1196125&r1=1196124&r2=1196125&view=diff
==============================================================================
--- lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/codecs/pulsing/TestPulsingReuse.java (original)
+++ lucene/dev/branches/lucene2621/lucene/src/test/org/apache/lucene/index/codecs/pulsing/TestPulsingReuse.java Tue Nov 1 16:53:43 2011
@@ -43,7 +43,6 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.PostingsFormat;
import org.apache.lucene.index.codecs.PostingsReaderBase;
import org.apache.lucene.index.codecs.PostingsWriterBase;
-import org.apache.lucene.index.codecs.lucene40.Lucene40PostingsBaseFormat;
import org.apache.lucene.index.codecs.lucene40.Lucene40PostingsReader;
import org.apache.lucene.index.codecs.lucene40.Lucene40PostingsWriter;
import org.apache.lucene.store.Directory;
@@ -60,7 +59,7 @@ public class TestPulsingReuse extends Lu
// TODO: this is a basic test. this thing is complicated, add more
public void testSophisticatedReuse() throws Exception {
// we always run this test with pulsing codec.
- Codec cp = _TestUtil.alwaysFormat(new PulsingPostingsFormat(new Lucene40PostingsBaseFormat(), 1));
+ Codec cp = _TestUtil.alwaysFormat(new Pulsing40PostingsFormat(1));
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random, dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodec(cp));