You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/02/09 02:04:13 UTC
svn commit: r1068718 [11/21] - in /lucene/dev/branches/bulkpostings: ./
dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/
dev-tools/maven/ dev-tools/maven/lucene/ dev-tools/maven/lucene/contrib/ant/
dev-tools/maven/lucene/contrib/db/bdb-j...
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java Wed Feb 9 01:03:49 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.search.spans;
import java.util.HashSet;
import java.util.Set;
+import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@@ -54,7 +55,7 @@ public class TestFieldMaskingSpanQuery e
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
- RandomIndexWriter writer= new RandomIndexWriter(random, directory);
+ RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
writer.addDocument(doc(new Field[] { field("id", "0")
,
@@ -111,7 +112,7 @@ public class TestFieldMaskingSpanQuery e
field("last", "jones") }));
reader = writer.getReader();
writer.close();
- searcher = new IndexSearcher(reader);
+ searcher = newSearcher(reader);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java Wed Feb 9 01:03:49 2011
@@ -57,7 +57,7 @@ public class TestNearSpansOrdered extend
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
- RandomIndexWriter writer= new RandomIndexWriter(random, directory);
+ RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
for (int i = 0; i < docFields.length; i++) {
Document doc = new Document();
doc.add(newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
@@ -65,7 +65,7 @@ public class TestNearSpansOrdered extend
}
reader = writer.getReader();
writer.close();
- searcher = new IndexSearcher(reader);
+ searcher = newSearcher(reader);
}
protected String[] docFields = {
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java Wed Feb 9 01:03:49 2011
@@ -178,6 +178,7 @@ public class TestPayloadSpans extends Lu
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 2, new int[]{3,3});
+ searcher.close();
closeIndexReader.close();
directory.close();
}
@@ -210,6 +211,7 @@ public class TestPayloadSpans extends Lu
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 1, new int[]{3});
+ searcher.close();
closeIndexReader.close();
directory.close();
}
@@ -247,6 +249,7 @@ public class TestPayloadSpans extends Lu
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), nestedSpanNearQuery);
assertTrue("spans is null and it shouldn't be", spans != null);
checkSpans(spans, 2, new int[]{8, 8});
+ searcher.close();
closeIndexReader.close();
directory.close();
}
@@ -262,7 +265,7 @@ public class TestPayloadSpans extends Lu
writer.addDocument(doc);
IndexReader reader = writer.getReader();
- IndexSearcher is = new IndexSearcher(reader);
+ IndexSearcher is = newSearcher(reader);
writer.close();
SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
@@ -285,6 +288,7 @@ public class TestPayloadSpans extends Lu
assertEquals(2, payloadSet.size());
assertTrue(payloadSet.contains("a:Noise:10"));
assertTrue(payloadSet.contains("k:Noise:11"));
+ is.close();
reader.close();
directory.close();
}
@@ -299,7 +303,7 @@ public class TestPayloadSpans extends Lu
doc.add(new Field("content", new StringReader("a b a d k f a h i k a k")));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
- IndexSearcher is = new IndexSearcher(reader);
+ IndexSearcher is = newSearcher(reader);
writer.close();
SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
@@ -321,6 +325,7 @@ public class TestPayloadSpans extends Lu
assertEquals(2, payloadSet.size());
assertTrue(payloadSet.contains("a:Noise:10"));
assertTrue(payloadSet.contains("k:Noise:11"));
+ is.close();
reader.close();
directory.close();
}
@@ -335,7 +340,7 @@ public class TestPayloadSpans extends Lu
doc.add(new Field("content", new StringReader("j k a l f k k p a t a k l k t a")));
writer.addDocument(doc);
IndexReader reader = writer.getReader();
- IndexSearcher is = new IndexSearcher(reader);
+ IndexSearcher is = newSearcher(reader);
writer.close();
SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
@@ -363,6 +368,7 @@ public class TestPayloadSpans extends Lu
}
assertTrue(payloadSet.contains("a:Noise:10"));
assertTrue(payloadSet.contains("k:Noise:11"));
+ is.close();
reader.close();
directory.close();
}
@@ -378,7 +384,7 @@ public class TestPayloadSpans extends Lu
IndexReader reader = writer.getReader();
writer.close();
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());
@@ -389,6 +395,7 @@ public class TestPayloadSpans extends Lu
if(VERBOSE)
System.out.println(new String(bytes));
}
+ searcher.close();
reader.close();
directory.close();
}
@@ -443,7 +450,7 @@ public class TestPayloadSpans extends Lu
closeIndexReader = writer.getReader();
writer.close();
- IndexSearcher searcher = new IndexSearcher(closeIndexReader);
+ IndexSearcher searcher = newSearcher(closeIndexReader);
return searcher;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java Wed Feb 9 01:03:49 2011
@@ -48,7 +48,7 @@ public class TestSpanFirstQuery extends
writer.addDocument(doc2);
IndexReader reader = writer.getReader();
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
// user queries on "starts-with quick"
SpanQuery sfq = new SpanFirstQuery(new SpanTermQuery(new Term("field", "quick")), 1);
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java Wed Feb 9 01:03:49 2011
@@ -53,7 +53,7 @@ public class TestSpanMultiTermQueryWrapp
iw.addDocument(doc);
reader = iw.getReader();
iw.close();
- searcher = new IndexSearcher(reader);
+ searcher = newSearcher(reader);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java Wed Feb 9 01:03:49 2011
@@ -53,7 +53,7 @@ public class TestSpans extends LuceneTes
public void setUp() throws Exception {
super.setUp();
directory = newDirectory();
- RandomIndexWriter writer= new RandomIndexWriter(random, directory);
+ RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
for (int i = 0; i < docFields.length; i++) {
Document doc = new Document();
doc.add(newField(field, docFields[i], Field.Store.YES, Field.Index.ANALYZED));
@@ -61,7 +61,7 @@ public class TestSpans extends LuceneTes
}
reader = writer.getReader();
writer.close();
- searcher = new IndexSearcher(reader);
+ searcher = newSearcher(reader);
}
@Override
@@ -486,7 +486,7 @@ public class TestSpans extends LuceneTes
// Get searcher
final IndexReader reader = IndexReader.open(dir, true);
- final IndexSearcher searcher = new IndexSearcher(reader);
+ final IndexSearcher searcher = newSearcher(reader);
// Control (make sure docs indexed)
assertEquals(2, hitCount(searcher, "the"));
@@ -499,6 +499,7 @@ public class TestSpans extends LuceneTes
searcher.search(createSpan(0, true,
new SpanQuery[] {createSpan(4, false, "chased", "cat"),
createSpan("ate")}), 10).totalHits);
+ searcher.close();
reader.close();
dir.close();
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java Wed Feb 9 01:03:49 2011
@@ -57,15 +57,16 @@ public class TestSpansAdvanced extends L
// create test index
mDirectory = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random,
- mDirectory, new MockAnalyzer(MockTokenizer.SIMPLE, true,
- MockTokenFilter.ENGLISH_STOPSET, true));
+ mDirectory, newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(MockTokenizer.SIMPLE, true,
+ MockTokenFilter.ENGLISH_STOPSET, true)).setMergePolicy(newInOrderLogMergePolicy()));
addDocument(writer, "1", "I think it should work.");
addDocument(writer, "2", "I think it should work.");
addDocument(writer, "3", "I think it should work.");
addDocument(writer, "4", "I think it should work.");
reader = writer.getReader();
writer.close();
- searcher = new IndexSearcher(reader);
+ searcher = newSearcher(reader);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced2.java Wed Feb 9 01:03:49 2011
@@ -48,7 +48,7 @@ public class TestSpansAdvanced2 extends
final RandomIndexWriter writer = new RandomIndexWriter(random, mDirectory,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(
MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true))
- .setOpenMode(OpenMode.APPEND));
+ .setOpenMode(OpenMode.APPEND).setMergePolicy(newInOrderLogMergePolicy()));
addDocument(writer, "A", "Should we, could we, would we?");
addDocument(writer, "B", "It should. Should it?");
addDocument(writer, "C", "It shouldn't.");
@@ -57,7 +57,7 @@ public class TestSpansAdvanced2 extends
writer.close();
// re-open the searcher since we added more docs
- searcher2 = new IndexSearcher(reader2);
+ searcher2 = newSearcher(reader2);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java Wed Feb 9 01:03:49 2011
@@ -271,7 +271,7 @@ public class TestBufferedIndexInput exte
assertEquals(reader.docFreq(bbb), 37);
dir.tweakBufferSizes();
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
ScoreDoc[] hits = searcher.search(new TermQuery(bbb), null, 1000).scoreDocs;
dir.tweakBufferSizes();
assertEquals(35, hits.length);
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestFileSwitchDirectory.java Wed Feb 9 01:03:49 2011
@@ -39,8 +39,10 @@ public class TestFileSwitchDirectory ext
fileExtensions.add(IndexFileNames.FIELDS_EXTENSION);
fileExtensions.add(IndexFileNames.FIELDS_INDEX_EXTENSION);
- Directory primaryDir = new MockDirectoryWrapper(random, new RAMDirectory());
- Directory secondaryDir = new MockDirectoryWrapper(random, new RAMDirectory());
+ MockDirectoryWrapper primaryDir = new MockDirectoryWrapper(random, new RAMDirectory());
+ primaryDir.setCheckIndexOnClose(false); // only part of an index
+ MockDirectoryWrapper secondaryDir = new MockDirectoryWrapper(random, new RAMDirectory());
+ secondaryDir.setCheckIndexOnClose(false); // only part of an index
FileSwitchDirectory fsd = new FileSwitchDirectory(fileExtensions, primaryDir, secondaryDir, true);
IndexWriter writer = new IndexWriter(
@@ -49,7 +51,7 @@ public class TestFileSwitchDirectory ext
setMergePolicy(newLogMergePolicy(false))
);
TestIndexWriterReader.createIndexNoClose(true, "ram", writer);
- IndexReader reader = IndexReader.open(writer);
+ IndexReader reader = IndexReader.open(writer, true);
assertEquals(100, reader.maxDoc());
writer.commit();
// we should see only fdx,fdt files here
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java Wed Feb 9 01:03:49 2011
@@ -20,6 +20,7 @@ package org.apache.lucene.store;
import java.io.File;
import java.util.Random;
+import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@@ -58,7 +59,7 @@ public class TestMultiMMap extends Lucen
// we will map a lot, try to turn on the unmap hack
if (MMapDirectory.UNMAP_SUPPORTED)
dir.setUseUnmap(true);
- RandomIndexWriter writer = new RandomIndexWriter(random, dir);
+ RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer()).setMergePolicy(newInOrderLogMergePolicy()));
Document doc = new Document();
Field docid = newField("docid", "0", Field.Store.YES, Field.Index.NOT_ANALYZED);
Field junk = newField("junk", "", Field.Store.YES, Field.Index.NOT_ANALYZED);
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java Wed Feb 9 01:03:49 2011
@@ -82,7 +82,7 @@ public class TestRAMDirectory extends Lu
assertEquals(docsToAdd, reader.numDocs());
// open search zo check if all doc's are there
- IndexSearcher searcher = new IndexSearcher(reader);
+ IndexSearcher searcher = newSearcher(reader);
// search for all documents
for (int i = 0; i < docsToAdd; i++) {
@@ -139,18 +139,6 @@ public class TestRAMDirectory extends Lu
writer.close();
}
-
- public void testSerializable() throws IOException {
- Directory dir = new RAMDirectory();
- ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
- assertEquals("initially empty", 0, bos.size());
- ObjectOutput out = new ObjectOutputStream(bos);
- int headerSize = bos.size();
- out.writeObject(dir);
- out.close();
- assertTrue("contains more then just header", headerSize < bos.size());
- }
-
@Override
public void tearDown() throws Exception {
// cleanup
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestBytesRefHash.java Wed Feb 9 01:03:49 2011
@@ -40,6 +40,7 @@ public class TestBytesRefHash extends Lu
/**
*/
+ @Override
@Before
public void setUp() throws Exception {
super.setUp();
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestDoubleBarrelLRUCache.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestDoubleBarrelLRUCache.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestDoubleBarrelLRUCache.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestDoubleBarrelLRUCache.java Wed Feb 9 01:03:49 2011
@@ -145,14 +145,17 @@ public class TestDoubleBarrelLRUCache ex
this.value = value;
}
+ @Override
public boolean equals(Object other) {
return this.value.equals(((CloneableObject) other).value);
}
+ @Override
public int hashCode() {
return value.hashCode();
}
+ @Override
public Object clone() {
return new CloneableObject(value);
}
@@ -165,14 +168,17 @@ public class TestDoubleBarrelLRUCache ex
this.value = value;
}
+ @Override
public boolean equals(Object other) {
return this.value.equals(((CloneableInteger) other).value);
}
+ @Override
public int hashCode() {
return value.hashCode();
}
+ @Override
public Object clone() {
return new CloneableInteger(value);
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/TestRecyclingByteBlockAllocator.java Wed Feb 9 01:03:49 2011
@@ -31,6 +31,7 @@ public class TestRecyclingByteBlockAlloc
/**
*/
+ @Override
@Before
public void setUp() throws Exception {
super.setUp();
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java Wed Feb 9 01:03:49 2011
@@ -59,11 +59,13 @@ public class TestFSTs extends LuceneTest
private MockDirectoryWrapper dir;
+ @Override
public void setUp() throws IOException {
dir = newDirectory();
dir.setPreventDoubleWrite(false);
}
+ @Override
public void tearDown() throws IOException {
dir.close();
}
@@ -958,7 +960,7 @@ public class TestFSTs extends LuceneTest
writer.addDocument(doc);
docCount++;
}
- IndexReader r = IndexReader.open(writer);
+ IndexReader r = IndexReader.open(writer, true);
writer.close();
final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(random.nextBoolean());
Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, outputs);
Modified: lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/CHANGES.txt Wed Feb 9 01:03:49 2011
@@ -80,6 +80,9 @@ New Features
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
+ * SOLR-1057: Add PathHierarchyTokenizer that represents file path hierarchies as synonyms of
+ /something, /something/something, /something/something/else. (Ryan McKinley, Koji Sekiguchi)
+
Build
* LUCENE-2413: All analyzers in contrib/analyzers and contrib/icu were moved to the
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/build.xml (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/build.xml Wed Feb 9 01:03:49 2011
@@ -30,6 +30,7 @@
<path id="test.classpath">
<path refid="classpath"/>
+ <pathelement location="../../../lucene/build/classes/test-framework"/>
<pathelement location="../../../lucene/build/classes/test/"/>
<path refid="junit-path"/>
<pathelement location="${build.dir}/classes/java"/>
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java Wed Feb 9 01:03:49 2011
@@ -82,7 +82,7 @@ public final class BrazilianAnalyzer ext
private Set<?> excltable = Collections.emptySet();
/**
- * Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
+ * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
*/
public BrazilianAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java Wed Feb 9 01:03:49 2011
@@ -101,6 +101,7 @@ public class HTMLStripCharFilter extends
if (len>0) {
return pushed.charAt(len-1);
}
+ numRead++;
int ch = input.read();
push(ch);
return ch;
@@ -672,6 +673,7 @@ public class HTMLStripCharFilter extends
+ @Override
public int read() throws IOException {
// TODO: Do we ever want to preserve CDATA sections?
// where do we have to worry about them?
@@ -740,6 +742,7 @@ public class HTMLStripCharFilter extends
}
+ @Override
public int read(char cbuf[], int off, int len) throws IOException {
int i=0;
for (i=0; i<len; i++) {
@@ -754,6 +757,7 @@ public class HTMLStripCharFilter extends
return i;
}
+ @Override
public void close() throws IOException {
input.close();
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java Wed Feb 9 01:03:49 2011
@@ -65,7 +65,7 @@ public final class CJKAnalyzer extends S
}
/**
- * Builds an analyzer which removes words in {@link #STOP_WORDS}.
+ * Builds an analyzer which removes words in {@link #getDefaultStopSet()}.
*/
public CJKAnalyzer(Version matchVersion) {
this(matchVersion, DefaultSetHolder.DEFAULT_STOP_SET);
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java Wed Feb 9 01:03:49 2011
@@ -116,6 +116,7 @@ public final class CommonGramsFilter ext
* eliminate the middle bigram "of-the"and save a disk seek and a whole set of
* position lookups.
*/
+ @Override
public boolean incrementToken() throws IOException {
// get the next piece of input
if (savedState != null) {
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsQueryFilter.java Wed Feb 9 01:03:49 2011
@@ -62,6 +62,7 @@ public final class CommonGramsQueryFilte
/**
* {@inheritDoc}
*/
+ @Override
public void reset() throws IOException {
super.reset();
previous = null;
@@ -76,6 +77,7 @@ public final class CommonGramsQueryFilte
* <li>output:"the-rain", "rain-in" ,"in-spain", "falls", "mainly"
* </ul>
*/
+ @Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
State current = captureState();
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/ByteVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/ByteVector.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/ByteVector.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/ByteVector.java Wed Feb 9 01:03:49 2011
@@ -18,14 +18,12 @@
package org.apache.lucene.analysis.compound.hyphenation;
-import java.io.Serializable;
-
/**
* This class implements a simple byte vector with access to the underlying
* array.
* This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
*/
-public class ByteVector implements Serializable {
+public class ByteVector {
/**
* Capacity increment size
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/CharVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/CharVector.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/CharVector.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/CharVector.java Wed Feb 9 01:03:49 2011
@@ -17,15 +17,13 @@
package org.apache.lucene.analysis.compound.hyphenation;
-import java.io.Serializable;
-
/**
* This class implements a simple char vector with access to the underlying
* array.
*
* This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
*/
-public class CharVector implements Cloneable, Serializable {
+public class CharVector implements Cloneable {
/**
* Capacity increment size
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/Hyphen.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/Hyphen.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/Hyphen.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/Hyphen.java Wed Feb 9 01:03:49 2011
@@ -17,8 +17,6 @@
package org.apache.lucene.analysis.compound.hyphenation;
-import java.io.Serializable;
-
/**
* This class represents a hyphen. A 'full' hyphen is made of 3 parts: the
* pre-break text, post-break text and no-break. If no line-break is generated
@@ -32,7 +30,7 @@ import java.io.Serializable;
* This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
*/
-public class Hyphen implements Serializable {
+public class Hyphen {
public String preBreak;
public String noBreak;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/HyphenationTree.java Wed Feb 9 01:03:49 2011
@@ -18,7 +18,6 @@
package org.apache.lucene.analysis.compound.hyphenation;
import java.io.File;
-import java.io.Serializable;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.HashMap;
@@ -31,10 +30,7 @@ import org.xml.sax.InputSource;
*
* This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
*/
-public class HyphenationTree extends TernaryTree implements PatternConsumer,
- Serializable {
-
- private static final long serialVersionUID = -7842107987915665573L;
+public class HyphenationTree extends TernaryTree implements PatternConsumer {
/**
* value space: stores the interletter values
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/TernaryTree.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/TernaryTree.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/TernaryTree.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/hyphenation/TernaryTree.java Wed Feb 9 01:03:49 2011
@@ -19,7 +19,6 @@ package org.apache.lucene.analysis.compo
import java.util.Enumeration;
import java.util.Stack;
-import java.io.Serializable;
/**
* <h2>Ternary Search Tree.</h2>
@@ -63,7 +62,7 @@ import java.io.Serializable;
* This class has been taken from the Apache FOP project (http://xmlgraphics.apache.org/fop/). They have been slightly modified.
*/
-public class TernaryTree implements Cloneable, Serializable {
+public class TernaryTree implements Cloneable {
/**
* We use 4 arrays to represent a node. I guess I should have created a proper
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java Wed Feb 9 01:03:49 2011
@@ -22,10 +22,9 @@ import java.util.Arrays;
import java.util.List;
import java.util.Set;
-import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.util.Version;
@@ -42,14 +41,10 @@ import org.apache.lucene.util.Version;
* increments are preserved
* </ul>
*/
-public final class StopFilter extends TokenFilter {
+public final class StopFilter extends FilteringTokenFilter {
private final CharArraySet stopWords;
- private boolean enablePositionIncrements = true;
-
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-
/**
* Construct a token stream filtering the given input. If
@@ -75,7 +70,7 @@ public final class StopFilter extends To
*/
public StopFilter(Version matchVersion, TokenStream input, Set<?> stopWords, boolean ignoreCase)
{
- super(input);
+ super(true, input);
this.stopWords = stopWords instanceof CharArraySet ? (CharArraySet) stopWords : new CharArraySet(matchVersion, stopWords, ignoreCase);
}
@@ -157,48 +152,8 @@ public final class StopFilter extends To
* Returns the next input Token whose term() is not a stop word.
*/
@Override
- public final boolean incrementToken() throws IOException {
- // return the first non-stop word found
- int skippedPositions = 0;
- while (input.incrementToken()) {
- if (!stopWords.contains(termAtt.buffer(), 0, termAtt.length())) {
- if (enablePositionIncrements) {
- posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
- }
- return true;
- }
- skippedPositions += posIncrAtt.getPositionIncrement();
- }
- // reached EOS -- return false
- return false;
- }
-
- /**
- * @see #setEnablePositionIncrements(boolean)
- */
- public boolean getEnablePositionIncrements() {
- return enablePositionIncrements;
+ protected boolean accept() throws IOException {
+ return !stopWords.contains(termAtt.buffer(), 0, termAtt.length());
}
- /**
- * If <code>true</code>, this StopFilter will preserve
- * positions of the incoming tokens (ie, accumulate and
- * set position increments of the removed stop tokens).
- * Generally, <code>true</code> is best as it does not
- * lose information (positions of the original tokens)
- * during indexing.
- *
- * Default is true.
- *
- * <p> When set, when a token is stopped
- * (omitted), the position increment of the following
- * token is incremented.
- *
- * <p> <b>NOTE</b>: be sure to also
- * set {@link QueryParser#setEnablePositionIncrements} if
- * you use QueryParser to create queries.
- */
- public void setEnablePositionIncrements(boolean enable) {
- this.enablePositionIncrements = enable;
- }
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java Wed Feb 9 01:03:49 2011
@@ -86,7 +86,7 @@ public final class CzechAnalyzer extends
private final Set<?> stemExclusionTable;
/**
- * Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}).
+ * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()}).
*
* @param matchVersion Lucene version to match See
* {@link <a href="#version">above</a>}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianCharFilter.java Wed Feb 9 01:03:49 2011
@@ -32,6 +32,7 @@ public class PersianCharFilter extends C
super(in);
}
+ @Override
public int read(char[] cbuf, int off, int len) throws IOException {
final int charsRead = super.read(cbuf, off, len);
if (charsRead > 0) {
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java Wed Feb 9 01:03:49 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
@@ -30,22 +31,19 @@ import org.apache.lucene.analysis.util.C
*
* @since solr 1.3
*/
-public final class KeepWordFilter extends TokenFilter {
+public final class KeepWordFilter extends FilteringTokenFilter {
private final CharArraySet words;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/** The words set passed to this constructor will be directly used by this filter
* and should not be modified, */
- public KeepWordFilter(TokenStream in, CharArraySet words) {
- super(in);
+ public KeepWordFilter(boolean enablePositionIncrements, TokenStream in, CharArraySet words) {
+ super(enablePositionIncrements, in);
this.words = words;
}
@Override
- public boolean incrementToken() throws IOException {
- while (input.incrementToken()) {
- if (words.contains(termAtt.buffer(), 0, termAtt.length())) return true;
- }
- return false;
+ public boolean accept() throws IOException {
+ return words.contains(termAtt.buffer(), 0, termAtt.length());
}
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilter.java Wed Feb 9 01:03:49 2011
@@ -74,10 +74,12 @@ public final class KeywordMarkerFilter e
@Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- keywordAttr.setKeyword(keywordSet.contains(termAtt.buffer(), 0,
- termAtt.length()));
+ if (keywordSet.contains(termAtt.buffer(), 0, termAtt.length())) {
+ keywordAttr.setKeyword(true);
+ }
return true;
- } else
+ } else {
return false;
+ }
}
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java Wed Feb 9 01:03:49 2011
@@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
@@ -29,7 +30,7 @@ import org.apache.lucene.analysis.tokena
* Note: Length is calculated as the number of UTF-16 code units.
* </p>
*/
-public final class LengthFilter extends TokenFilter {
+public final class LengthFilter extends FilteringTokenFilter {
private final int min;
private final int max;
@@ -40,27 +41,15 @@ public final class LengthFilter extends
* Build a filter that removes words that are too long or too
* short from the text.
*/
- public LengthFilter(TokenStream in, int min, int max)
- {
- super(in);
+ public LengthFilter(boolean enablePositionIncrements, TokenStream in, int min, int max) {
+ super(enablePositionIncrements, in);
this.min = min;
this.max = max;
}
- /**
- * Returns the next input Token whose term() is the right len
- */
@Override
- public final boolean incrementToken() throws IOException {
- // return the first non-stop word found
- while (input.incrementToken()) {
- int len = termAtt.length();
- if (len >= min && len <= max) {
- return true;
- }
- // note: else we ignore it but should we index each part of it?
- }
- // reached EOS -- return false
- return false;
+ public boolean accept() throws IOException {
+ final int len = termAtt.length();
+ return (len >= min && len <= max);
}
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java Wed Feb 9 01:03:49 2011
@@ -109,7 +109,7 @@ public final class DutchAnalyzer extends
private final Version matchVersion;
/**
- * Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS})
+ * Builds an analyzer with the default stop words ({@link #getDefaultStopSet()})
* and a few default entries for the stem exclusion table.
*
*/
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternReplaceCharFilter.java Wed Feb 9 01:03:49 2011
@@ -113,6 +113,7 @@ public class PatternReplaceCharFilter ex
}
}
+ @Override
public int read() throws IOException {
while( prepareReplaceBlock() ){
return replaceBlockBuffer.charAt( replaceBlockBufferOffset++ );
@@ -120,6 +121,7 @@ public class PatternReplaceCharFilter ex
return -1;
}
+ @Override
public int read(char[] cbuf, int off, int len) throws IOException {
char[] tmp = new char[len];
int l = input.read(tmp, 0, len);
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java Wed Feb 9 01:03:49 2011
@@ -78,6 +78,7 @@ public class SynonymMap {
}
+ @Override
public String toString() {
StringBuilder sb = new StringBuilder("<");
if (synonyms!=null) {
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/charfilter/HTMLStripCharFilterTest.java Wed Feb 9 01:03:49 2011
@@ -169,7 +169,7 @@ public class HTMLStripCharFilterTest ext
public void testBufferOverflow() throws Exception {
StringBuilder testBuilder = new StringBuilder(HTMLStripCharFilter.DEFAULT_READ_AHEAD + 50);
- testBuilder.append("ah<?> ");
+ testBuilder.append("ah<?> ??????");
appendChars(testBuilder, HTMLStripCharFilter.DEFAULT_READ_AHEAD + 500);
processBuffer(testBuilder.toString(), "Failed on pseudo proc. instr.");//processing instructions
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/el/TestGreekStemmer.java Wed Feb 9 01:03:49 2011
@@ -1,5 +1,22 @@
package org.apache.lucene.analysis.el;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java Wed Feb 9 01:03:49 2011
@@ -35,16 +35,26 @@ public class TestKeepWordFilter extends
words.add( "aaa" );
words.add( "bbb" );
- String input = "aaa BBB ccc ddd EEE";
+ String input = "xxx yyy aaa zzz BBB ccc ddd EEE";
// Test Stopwords
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- stream = new KeepWordFilter(stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
- assertTokenStreamContents(stream, new String[] { "aaa", "BBB" });
+ stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
+ assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 3, 2 });
// Now force case
stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
- stream = new KeepWordFilter(stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
- assertTokenStreamContents(stream, new String[] { "aaa" });
+ stream = new KeepWordFilter(true, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
+ assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 3 });
+
+ // Test Stopwords
+ stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT, words, true));
+ assertTokenStreamContents(stream, new String[] { "aaa", "BBB" }, new int[] { 1, 1 });
+
+ // Now force case
+ stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(input));
+ stream = new KeepWordFilter(false, stream, new CharArraySet(TEST_VERSION_CURRENT,words, false));
+ assertTokenStreamContents(stream, new String[] { "aaa" }, new int[] { 1 });
}
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java Wed Feb 9 01:03:49 2011
@@ -2,6 +2,7 @@ package org.apache.lucene.analysis.misce
import java.io.IOException;
import java.io.StringReader;
+import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
@@ -57,6 +58,19 @@ public class TestKeywordMarkerFilter ext
"The quIck browN LuceneFox Jumps")), set2)), output);
}
+ // LUCENE-2901
+ public void testComposition() throws Exception {
+ TokenStream ts = new LowerCaseFilterMock(
+ new KeywordMarkerFilter(
+ new KeywordMarkerFilter(
+ new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+ new StringReader("Dogs Trees Birds Houses")),
+ new HashSet<String>(Arrays.asList(new String[] { "Birds", "Houses" }))),
+ new HashSet<String>(Arrays.asList(new String[] { "Dogs", "Trees" }))));
+
+ assertTokenStreamContents(ts, new String[] { "Dogs", "Trees", "Birds", "Houses" });
+ }
+
public static final class LowerCaseFilterMock extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestLengthFilter.java Wed Feb 9 01:03:49 2011
@@ -24,19 +24,24 @@ import java.io.StringReader;
public class TestLengthFilter extends BaseTokenStreamTestCase {
- public void testFilter() throws Exception {
+ public void testFilterNoPosIncr() throws Exception {
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
- LengthFilter filter = new LengthFilter(stream, 2, 6);
- CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
+ LengthFilter filter = new LengthFilter(false, stream, 2, 6);
+ assertTokenStreamContents(filter,
+ new String[]{"short", "ab", "foo"},
+ new int[]{1, 1, 1}
+ );
+ }
- assertTrue(filter.incrementToken());
- assertEquals("short", termAtt.toString());
- assertTrue(filter.incrementToken());
- assertEquals("ab", termAtt.toString());
- assertTrue(filter.incrementToken());
- assertEquals("foo", termAtt.toString());
- assertFalse(filter.incrementToken());
+ public void testFilterWithPosIncr() throws Exception {
+ TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
+ new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
+ LengthFilter filter = new LengthFilter(true, stream, 2, 6);
+ assertTokenStreamContents(filter,
+ new String[]{"short", "ab", "foo"},
+ new int[]{1, 4, 2}
+ );
}
}
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java Wed Feb 9 01:03:49 2011
@@ -47,6 +47,7 @@ public class TestRemoveDuplicatesTokenFi
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+ @Override
public boolean incrementToken() {
if (toks.hasNext()) {
clearAttributes();
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java Wed Feb 9 01:03:49 2011
@@ -87,6 +87,7 @@ public class TestTrimFilter extends Base
this(tokens.toArray(new Token[tokens.size()]));
}
+ @Override
public boolean incrementToken() throws IOException {
if (index >= tokens.length)
return false;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java Wed Feb 9 01:03:49 2011
@@ -213,6 +213,7 @@ public class TestWordDelimiterFilter ext
/* analyzer that uses whitespace + wdf */
Analyzer a = new Analyzer() {
+ @Override
public TokenStream tokenStream(String field, Reader reader) {
return new WordDelimiterFilter(
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader),
@@ -239,6 +240,7 @@ public class TestWordDelimiterFilter ext
/* analyzer that will consume tokens with large position increments */
Analyzer a2 = new Analyzer() {
+ @Override
public TokenStream tokenStream(String field, Reader reader) {
return new WordDelimiterFilter(
new LargePosIncTokenFilter(
@@ -271,6 +273,7 @@ public class TestWordDelimiterFilter ext
new int[] { 1, 11, 1 });
Analyzer a3 = new Analyzer() {
+ @Override
public TokenStream tokenStream(String field, Reader reader) {
StopFilter filter = new StopFilter(TEST_VERSION_CURRENT,
new WhitespaceTokenizer(TEST_VERSION_CURRENT, reader), StandardAnalyzer.STOP_WORDS_SET);
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java Wed Feb 9 01:03:49 2011
@@ -76,7 +76,10 @@ public class QueryAutoStopWordAnalyzerTe
private int search(Analyzer a, String queryString) throws IOException, ParseException {
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "repetitiveField", a);
Query q = qp.parse(queryString);
- return new IndexSearcher(reader).search(q, null, 1000).totalHits;
+ IndexSearcher searcher = newSearcher(reader);
+ int hits = searcher.search(q, null, 1000).totalHits;
+ searcher.close();
+ return hits;
}
public void testUninitializedAnalyzer() throws Exception {
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java Wed Feb 9 01:03:49 2011
@@ -395,6 +395,7 @@ public class TestSynonymFilter extends B
this(tokens.toArray(new Token[tokens.size()]));
}
+ @Override
public boolean incrementToken() throws IOException {
if (index >= tokens.length)
return false;
Modified: lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/common/src/test/org/apache/lucene/collation/CollationTestBase.java Wed Feb 9 01:03:49 2011
@@ -141,7 +141,7 @@ public abstract class CollationTestBase
writer.close();
IndexReader reader = IndexReader.open(farsiIndex, true);
- IndexSearcher search = new IndexSearcher(reader);
+ IndexSearcher search = newSearcher(reader);
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/build.xml Wed Feb 9 01:03:49 2011
@@ -49,6 +49,7 @@
<path id="test.classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="classpath"/>
+ <pathelement location="../../../lucene/build/classes/test-framework/"/>
<pathelement location="../../../lucene/build/classes/test/"/>
<pathelement location="../build/common/classes/test/"/>
<path refid="junit-path"/>
Modified: lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/icu/src/java/org/apache/lucene/analysis/icu/tokenattributes/ScriptAttributeImpl.java Wed Feb 9 01:03:49 2011
@@ -17,8 +17,6 @@ package org.apache.lucene.analysis.icu.t
* limitations under the License.
*/
-import java.io.Serializable;
-
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
@@ -29,7 +27,7 @@ import com.ibm.icu.lang.UScript;
* as an integer.
* @lucene.experimental
*/
-public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribute, Cloneable, Serializable {
+public class ScriptAttributeImpl extends AttributeImpl implements ScriptAttribute, Cloneable {
private int code = UScript.COMMON;
public int getCode() {
Modified: lucene/dev/branches/bulkpostings/modules/analysis/phonetic/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/phonetic/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/phonetic/build.xml (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/phonetic/build.xml Wed Feb 9 01:03:49 2011
@@ -48,6 +48,7 @@
<path id="test.classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="classpath"/>
+ <pathelement location="../../../lucene/build/classes/test-framework/"/>
<pathelement location="../../../lucene/build/classes/test/"/>
<pathelement location="../build/common/classes/test/"/>
<path refid="junit-path"/>
Modified: lucene/dev/branches/bulkpostings/modules/analysis/smartcn/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/smartcn/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/smartcn/build.xml (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/smartcn/build.xml Wed Feb 9 01:03:49 2011
@@ -39,6 +39,7 @@
<path id="test.classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="classpath"/>
+ <pathelement location="../../../lucene/build/classes/test-framework"/>
<pathelement location="../../../lucene/build/classes/test/"/>
<path refid="junit-path"/>
<pathelement location="${build.dir}/classes/java"/>
Modified: lucene/dev/branches/bulkpostings/modules/analysis/stempel/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/analysis/stempel/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/analysis/stempel/build.xml (original)
+++ lucene/dev/branches/bulkpostings/modules/analysis/stempel/build.xml Wed Feb 9 01:03:49 2011
@@ -38,6 +38,7 @@
<path id="test.classpath">
<path refid="classpath"/>
+ <pathelement location="../../../lucene/build/classes/test-framework"/>
<pathelement location="../../../lucene/build/classes/test/"/>
<path refid="junit-path"/>
<pathelement location="${build.dir}/classes/java"/>
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/CHANGES.txt?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/CHANGES.txt (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/CHANGES.txt Wed Feb 9 01:03:49 2011
@@ -2,6 +2,17 @@ Lucene Benchmark Contrib Change Log
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
+02/05/2011
+ LUCENE-1540: Improvements to contrib.benchmark for TREC collections.
+ ContentSource can now process plain text files, gzip files, and bzip2 files.
+ TREC doc parsing now handles the TREC gov2 collection and TREC disks 4&5-CR
+ collection (both used by many TREC tasks). (Shai Erera, Doron Cohen)
+
+01/26/2011
+ LUCENE-929: ExtractReuters first extracts to a tmp dir and then renames. That
+ way, if a previous extract attempt failed, "ant extract-reuters" will still
+ extract the files. (Shai Erera, Doron Cohen, Grant Ingersoll)
+
01/24/2011
LUCENE-2885: Add WaitForMerges task (calls IndexWriter.waitForMerges()).
(Mike McCandless)
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/build.xml?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/build.xml (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/build.xml Wed Feb 9 01:03:49 2011
@@ -87,7 +87,6 @@
</target>
<target name="extract-reuters" depends="check-files" unless="reuters.extracted">
- <mkdir dir="${working.dir}/reuters-out"/>
<java classname="org.apache.lucene.benchmark.utils.ExtractReuters" maxmemory="1024M" fork="true">
<classpath refid="run.classpath"/>
<arg file="${working.dir}/reuters"/>
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/conf/createLineFile.alg
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/conf/createLineFile.alg?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/conf/createLineFile.alg (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/conf/createLineFile.alg Wed Feb 9 01:03:49 2011
@@ -29,10 +29,14 @@
#
# Where to get documents from:
-content.source=org.apache.lucene.benchmark.byTask.feeds.ReutersContentSource
+content.source=org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource
# Where to write the line file output:
-line.file.out=work/reuters.lines.txt
+line.file.out=/x/tmp/enwiki.out.txt
+
+docs.file=/x/lucene/data/enwiki/enwiki-20110115-pages-articles.xml
+
+keep.image.only.docs = false
# Stop after processing the document feed once:
content.source.forever=false
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/ContentSource.java Wed Feb 9 01:03:49 2011
@@ -56,11 +56,14 @@ import org.apache.lucene.benchmark.byTas
public abstract class ContentSource {
private static final int BZIP = 0;
- private static final int OTHER = 1;
+ private static final int GZIP = 1;
+ private static final int OTHER = 2;
private static final Map<String,Integer> extensionToType = new HashMap<String,Integer>();
static {
extensionToType.put(".bz2", Integer.valueOf(BZIP));
extensionToType.put(".bzip", Integer.valueOf(BZIP));
+ extensionToType.put(".gz", Integer.valueOf(GZIP));
+ extensionToType.put(".gzip", Integer.valueOf(GZIP));
}
protected static final int BUFFER_SIZE = 1 << 16; // 64K
@@ -78,11 +81,13 @@ public abstract class ContentSource {
private CompressorStreamFactory csFactory = new CompressorStreamFactory();
+ /** update count of bytes generated by this source */
protected final synchronized void addBytes(long numBytes) {
bytesCount += numBytes;
totalBytesCount += numBytes;
}
+ /** update count of documents generated by this source */
protected final synchronized void addDoc() {
++docsCount;
++totalDocsCount;
@@ -130,21 +135,25 @@ public abstract class ContentSource {
type = typeInt.intValue();
}
}
- switch (type) {
- case BZIP:
- try {
+
+ try {
+ switch (type) {
+ case BZIP:
// According to BZip2CompressorInputStream's code, it reads the first
// two file header chars ('B' and 'Z'). It is important to wrap the
// underlying input stream with a buffered one since
// Bzip2CompressorInputStream uses the read() method exclusively.
is = csFactory.createCompressorInputStream("bzip2", is);
- } catch (CompressorException e) {
- IOException ioe = new IOException(e.getMessage());
- ioe.initCause(e);
- throw ioe;
- }
- break;
- default: // Do nothing, stay with FileInputStream
+ break;
+ case GZIP:
+ is = csFactory.createCompressorInputStream("gz", is);
+ break;
+ default: // Do nothing, stay with FileInputStream
+ }
+ } catch (CompressorException e) {
+ IOException ioe = new IOException(e.getMessage());
+ ioe.initCause(e);
+ throw ioe;
}
return is;
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DemoHTMLParser.java Wed Feb 9 01:03:49 2011
@@ -29,11 +29,14 @@ import java.util.Properties;
*/
public class DemoHTMLParser implements org.apache.lucene.benchmark.byTask.feeds.HTMLParser {
- public DocData parse(DocData docData, String name, Date date, Reader reader, DateFormat dateFormat) throws IOException, InterruptedException {
+ public DocData parse(DocData docData, String name, Date date, String title, Reader reader, DateFormat dateFormat) throws IOException, InterruptedException {
org.apache.lucene.demo.html.HTMLParser p = new org.apache.lucene.demo.html.HTMLParser(reader);
// title
- String title = p.getTitle();
+ if (title==null) {
+ title = p.getTitle();
+ }
+
// properties
Properties props = p.getMetaTags();
// body
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/HTMLParser.java Wed Feb 9 01:03:49 2011
@@ -29,16 +29,18 @@ public interface HTMLParser {
/**
* Parse the input Reader and return DocData.
- * A provided name or date is used for the result, otherwise an attempt is
- * made to set them from the parsed data.
- * @param dateFormat date formatter to use for extracting the date.
- * @param name name of the result doc data. If null, attempt to set by parsed data.
+ * The provided name,title,date are used for the result, unless when they're null,
+ * in which case an attempt is made to set them from the parsed data.
+ * @param docData result reused
+ * @param name name of the result doc data.
* @param date date of the result doc data. If null, attempt to set by parsed data.
- * @param reader of html text to parse.
+ * @param title title of the result doc data. If null, attempt to set by parsed data.
+ * @param reader reader of html text to parse.
+ * @param dateFormat date formatter to use for extracting the date.
* @return Parsed doc data.
* @throws IOException
* @throws InterruptedException
*/
- public DocData parse(DocData docData, String name, Date date, Reader reader, DateFormat dateFormat) throws IOException, InterruptedException;
+ public DocData parse(DocData docData, String name, Date date, String title, Reader reader, DateFormat dateFormat) throws IOException, InterruptedException;
}
Modified: lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java?rev=1068718&r1=1068717&r2=1068718&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java (original)
+++ lucene/dev/branches/bulkpostings/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishQueryMaker.java Wed Feb 9 01:03:49 2011
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.lucene.benchmark.byTask.feeds;
import org.apache.lucene.analysis.Analyzer;