You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/13 03:09:56 UTC
svn commit: r1058390 [3/16] - in /lucene/dev/branches/bulkpostings: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/
dev-tools/idea/.idea/libraries/ dev-tools/idea/lucene/
dev-tools/idea/lucene/contrib/ dev-tools/idea/lucene/contr...
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/geohash/GeoHashDistanceFilter.java Thu Jan 13 02:09:33 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.spatial.geohas
import java.io.IOException;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.DocTerms;
import org.apache.lucene.search.Filter;
@@ -62,15 +62,15 @@ public class GeoHashDistanceFilter exten
}
@Override
- public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
- final DocTerms geoHashValues = FieldCache.DEFAULT.getTerms(reader, geoHashField);
+ final DocTerms geoHashValues = FieldCache.DEFAULT.getTerms(context.reader, geoHashField);
final BytesRef br = new BytesRef();
final int docBase = nextDocBase;
- nextDocBase += reader.maxDoc();
+ nextDocBase += context.reader.maxDoc();
- return new FilteredDocIdSet(startingFilter.getDocIdSet(reader)) {
+ return new FilteredDocIdSet(startingFilter.getDocIdSet(context)) {
@Override
public boolean match(int doc) {
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/CartesianShapeFilter.java Thu Jan 13 02:09:33 2011
@@ -20,7 +20,7 @@ import java.io.IOException;
import java.util.List;
import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
@@ -45,8 +45,8 @@ public class CartesianShapeFilter extend
}
@Override
- public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
- final Bits delDocs = reader.getDeletedDocs();
+ public DocIdSet getDocIdSet(final AtomicReaderContext context) throws IOException {
+ final Bits delDocs = context.reader.getDeletedDocs();
final List<Double> area = shape.getArea();
final int sz = area.size();
@@ -58,7 +58,7 @@ public class CartesianShapeFilter extend
return new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
- return reader.termDocsEnum(delDocs, fieldName, bytesRef);
+ return context.reader.termDocsEnum(delDocs, fieldName, bytesRef);
}
@Override
@@ -67,11 +67,11 @@ public class CartesianShapeFilter extend
}
};
} else {
- final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
+ final OpenBitSet bits = new OpenBitSet(context.reader.maxDoc());
for (int i =0; i< sz; i++) {
double boxId = area.get(i).doubleValue();
NumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(boxId), 0, bytesRef);
- final DocsEnum docsEnum = reader.termDocsEnum(delDocs, fieldName, bytesRef);
+ final DocsEnum docsEnum = context.reader.termDocsEnum(delDocs, fieldName, bytesRef);
if (docsEnum == null) continue;
// iterate through all documents
// which have this boxId
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/java/org/apache/lucene/spatial/tier/LatLongDistanceFilter.java Thu Jan 13 02:09:33 2011
@@ -18,7 +18,8 @@
package org.apache.lucene.spatial.tier;
import java.io.IOException;
-import org.apache.lucene.index.IndexReader;
+
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.FilteredDocIdSet;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.Filter;
@@ -64,15 +65,15 @@ public class LatLongDistanceFilter exten
}
@Override
- public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
- final double[] latIndex = FieldCache.DEFAULT.getDoubles(reader, latField);
- final double[] lngIndex = FieldCache.DEFAULT.getDoubles(reader, lngField);
+ final double[] latIndex = FieldCache.DEFAULT.getDoubles(context.reader, latField);
+ final double[] lngIndex = FieldCache.DEFAULT.getDoubles(context.reader, lngField);
final int docBase = nextDocBase;
- nextDocBase += reader.maxDoc();
+ nextDocBase += context.reader.maxDoc();
- return new FilteredDocIdSet(startingFilter.getDocIdSet(reader)) {
+ return new FilteredDocIdSet(startingFilter.getDocIdSet(context)) {
@Override
protected boolean match(int doc) {
double x = latIndex[doc];
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestCartesian.java Thu Jan 13 02:09:33 2011
@@ -252,7 +252,7 @@ public class TestCartesian extends Lucen
// Perform the search, using the term query, the serial chain filter, and the
// distance sort
- TopDocs hits = searcher.search(customScore.createWeight(searcher),null, 1000, sort);
+ TopDocs hits = searcher.search(customScore,null, 1000, sort);
int results = hits.totalHits;
ScoreDoc[] scoreDocs = hits.scoreDocs;
@@ -348,7 +348,7 @@ public class TestCartesian extends Lucen
// Perform the search, using the term query, the serial chain filter, and the
// distance sort
- TopDocs hits = searcher.search(customScore.createWeight(searcher),null, 1000, sort);
+ TopDocs hits = searcher.search(customScore,null, 1000, sort);
int results = hits.totalHits;
ScoreDoc[] scoreDocs = hits.scoreDocs;
@@ -444,7 +444,7 @@ public class TestCartesian extends Lucen
// Perform the search, using the term query, the serial chain filter, and the
// distance sort
- TopDocs hits = searcher.search(customScore.createWeight(searcher),null, 1000, sort);
+ TopDocs hits = searcher.search(customScore,null, 1000, sort);
int results = hits.totalHits;
ScoreDoc[] scoreDocs = hits.scoreDocs;
@@ -539,7 +539,7 @@ public class TestCartesian extends Lucen
// Perform the search, using the term query, the serial chain filter, and the
// distance sort
- TopDocs hits = searcher.search(customScore.createWeight(searcher),dq.getFilter(), 1000); //,sort);
+ TopDocs hits = searcher.search(customScore,dq.getFilter(), 1000); //,sort);
int results = hits.totalHits;
ScoreDoc[] scoreDocs = hits.scoreDocs;
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/spatial/src/test/org/apache/lucene/spatial/tier/TestDistance.java Thu Jan 13 02:09:33 2011
@@ -22,14 +22,17 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.store.Directory;
+
public class TestDistance extends LuceneTestCase {
private Directory directory;
@@ -100,9 +103,9 @@ public class TestDistance extends Lucene
LatLongDistanceFilter f = new LatLongDistanceFilter(new QueryWrapperFilter(new MatchAllDocsQuery()),
lat, lng, 1.0, latField, lngField);
- IndexReader[] readers = r.getSequentialSubReaders();
- for(int i=0;i<readers.length;i++) {
- f.getDocIdSet(readers[i]);
+ AtomicReaderContext[] leaves = ReaderUtil.leaves(r.getTopReaderContext());
+ for (int i = 0; i < leaves.length; i++) {
+ f.getDocIdSet(leaves[i]);
}
r.close();
}
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/SpellChecker.java Thu Jan 13 02:09:33 2011
@@ -18,8 +18,10 @@ package org.apache.lucene.search.spell;
*/
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
+import java.util.List;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
@@ -30,6 +32,8 @@ import org.apache.lucene.index.IndexWrit
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
@@ -38,7 +42,10 @@ import org.apache.lucene.search.ScoreDoc
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.Version;
+import org.apache.lucene.util.VirtualMethod;
/**
* <p>
@@ -492,35 +499,64 @@ public class SpellChecker implements jav
* @param dict Dictionary to index
* @param mergeFactor mergeFactor to use when indexing
* @param ramMB the max amount or memory in MB to use
+ * @param optimize whether or not the spellcheck index should be optimized
* @throws AlreadyClosedException if the Spellchecker is already closed
* @throws IOException
*/
- public void indexDictionary(Dictionary dict, int mergeFactor, int ramMB) throws IOException {
+ public final void indexDictionary(Dictionary dict, int mergeFactor, int ramMB, boolean optimize) throws IOException {
synchronized (modifyCurrentIndexLock) {
ensureOpen();
final Directory dir = this.spellIndex;
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(mergeFactor);
+ IndexSearcher indexSearcher = obtainSearcher();
+ final List<TermsEnum> termsEnums = new ArrayList<TermsEnum>();
+
+ if (searcher.maxDoc() > 0) {
+ new ReaderUtil.Gather(searcher.getIndexReader()) {
+ @Override
+ protected void add(int base, IndexReader r) throws IOException {
+ Terms terms = r.terms(F_WORD);
+ if (terms != null)
+ termsEnums.add(terms.iterator());
+ }
+ }.run();
+ }
+
+ boolean isEmpty = termsEnums.isEmpty();
+
+ try {
+ Iterator<String> iter = dict.getWordsIterator();
+ BytesRef currentTerm = new BytesRef();
+
+ terms: while (iter.hasNext()) {
+ String word = iter.next();
+
+ int len = word.length();
+ if (len < 3) {
+ continue; // too short we bail but "too long" is fine...
+ }
- Iterator<String> iter = dict.getWordsIterator();
- while (iter.hasNext()) {
- String word = iter.next();
-
- int len = word.length();
- if (len < 3) {
- continue; // too short we bail but "too long" is fine...
- }
+ if (!isEmpty) {
+ // we have a non-empty index, check if the term exists
+ currentTerm.copy(word);
+ for (TermsEnum te : termsEnums) {
+ if (te.seek(currentTerm, false) == TermsEnum.SeekStatus.FOUND) {
+ continue terms;
+ }
+ }
+ }
- if (this.exist(word)) { // if the word already exist in the gramindex
- continue;
+ // ok index the word
+ Document doc = createDocument(word, getMin(len), getMax(len));
+ writer.addDocument(doc);
}
-
- // ok index the word
- Document doc = createDocument(word, getMin(len), getMax(len));
- writer.addDocument(doc);
+ } finally {
+ releaseSearcher(indexSearcher);
}
// close writer
- writer.optimize();
+ if (optimize)
+ writer.optimize();
writer.close();
// also re-open the spell index to see our own changes when the next suggestion
// is fetched:
@@ -531,10 +567,21 @@ public class SpellChecker implements jav
/**
* Indexes the data from the given {@link Dictionary}.
* @param dict the dictionary to index
+ * @param mergeFactor mergeFactor to use when indexing
+ * @param ramMB the max amount or memory in MB to use
+ * @throws IOException
+ */
+ public final void indexDictionary(Dictionary dict, int mergeFactor, int ramMB) throws IOException {
+ indexDictionary(dict, mergeFactor, ramMB, true);
+ }
+
+ /**
+ * Indexes the data from the given {@link Dictionary}.
+ * @param dict the dictionary to index
* @throws IOException
*/
- public void indexDictionary(Dictionary dict) throws IOException {
- indexDictionary(dict, 300, 10);
+ public final void indexDictionary(Dictionary dict) throws IOException {
+ indexDictionary(dict, 300, (int)IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
}
private static int getMin(int l) {
@@ -559,7 +606,12 @@ public class SpellChecker implements jav
private static Document createDocument(String text, int ng1, int ng2) {
Document doc = new Document();
- doc.add(new Field(F_WORD, text, Field.Store.YES, Field.Index.NOT_ANALYZED)); // orig term
+ // the word field is never queried on... its indexed so it can be quickly
+ // checked for rebuild (and stored for retrieval). Doesn't need norms or TF/pos
+ Field f = new Field(F_WORD, text, Field.Store.YES, Field.Index.NOT_ANALYZED);
+ f.setOmitTermFreqAndPositions(true);
+ f.setOmitNorms(true);
+ doc.add(f); // orig term
addGram(text, doc, ng1, ng2);
return doc;
}
@@ -573,12 +625,20 @@ public class SpellChecker implements jav
String gram = text.substring(i, i + ng);
doc.add(new Field(key, gram, Field.Store.NO, Field.Index.NOT_ANALYZED));
if (i == 0) {
- doc.add(new Field("start" + ng, gram, Field.Store.NO, Field.Index.NOT_ANALYZED));
+ // only one term possible in the startXXField, TF/pos and norms aren't needed.
+ Field startField = new Field("start" + ng, gram, Field.Store.NO, Field.Index.NOT_ANALYZED);
+ startField.setOmitTermFreqAndPositions(true);
+ startField.setOmitNorms(true);
+ doc.add(startField);
}
end = gram;
}
if (end != null) { // may not be present if len==ng1
- doc.add(new Field("end" + ng, end, Field.Store.NO, Field.Index.NOT_ANALYZED));
+ // only one term possible in the endXXField, TF/pos and norms aren't needed.
+ Field endField = new Field("end" + ng, end, Field.Store.NO, Field.Index.NOT_ANALYZED);
+ endField.setOmitTermFreqAndPositions(true);
+ endField.setOmitNorms(true);
+ doc.add(endField);
}
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java Thu Jan 13 02:09:33 2011
@@ -39,7 +39,6 @@ import org.apache.lucene.search.Collecto
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
@@ -104,7 +103,7 @@ public final class SynExpand {
* @return the expanded Query
*/
public static Query expand( String query,
- Searcher syns,
+ IndexSearcher syns,
Analyzer a,
String f,
final float boost)
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java Thu Jan 13 02:09:33 2011
@@ -39,7 +39,6 @@ import org.apache.lucene.search.IndexSea
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.FSDirectory;
@@ -114,7 +113,7 @@ public class SynLookup {
* @param boost
*/
public static Query expand( String query,
- Searcher syns,
+ IndexSearcher syns,
Analyzer a,
final String field,
final float boost)
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java Thu Jan 13 02:09:33 2011
@@ -26,13 +26,12 @@ import org.apache.lucene.search.BooleanC
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestWordnet extends LuceneTestCase {
- private Searcher searcher;
+ private IndexSearcher searcher;
private Directory dir;
String storePathName = new File(TEMP_DIR,"testLuceneWordnet").getAbsolutePath();
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/NumericRangeFilterBuilder.java Thu Jan 13 02:09:33 2011
@@ -19,7 +19,7 @@ package org.apache.lucene.xmlparser.buil
import java.io.IOException;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.NumericRangeFilter;
@@ -157,7 +157,7 @@ public class NumericRangeFilterBuilder i
private static final long serialVersionUID = 1L;
@Override
- public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ public DocIdSet getDocIdSet(AtomicReaderContext context) throws IOException {
return null;
}
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/builders/TestNumericRangeFilterBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/builders/TestNumericRangeFilterBuilder.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/builders/TestNumericRangeFilterBuilder.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/builders/TestNumericRangeFilterBuilder.java Thu Jan 13 02:09:33 2011
@@ -28,7 +28,9 @@ import javax.xml.parsers.ParserConfigura
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.SlowMultiReaderWrapper;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.NumericRangeFilter;
import org.apache.lucene.store.Directory;
@@ -64,10 +66,10 @@ public class TestNumericRangeFilterBuild
writer.commit();
try
{
- IndexReader reader = IndexReader.open(ramDir, true);
+ IndexReader reader = new SlowMultiReaderWrapper(IndexReader.open(ramDir, true));
try
{
- assertNull(filter.getDocIdSet(reader));
+ assertNull(filter.getDocIdSet((AtomicReaderContext) reader.getTopReaderContext()));
}
finally
{
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/AbstractField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/AbstractField.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/AbstractField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/AbstractField.java Thu Jan 13 02:09:33 2011
@@ -99,7 +99,7 @@ public abstract class AbstractField impl
*
* <p>Note: this value is not stored directly with the document in the index.
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
- * {@link org.apache.lucene.search.Searcher#doc(int)} may thus not have the same value present as when
+ * {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when
* this field was indexed.
*
* @see #setBoost(float)
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java Thu Jan 13 02:09:33 2011
@@ -18,8 +18,8 @@ package org.apache.lucene.document;
*/
import java.util.*; // for javadoc
+import org.apache.lucene.search.IndexSearcher; // for javadoc
import org.apache.lucene.search.ScoreDoc; // for javadoc
-import org.apache.lucene.search.Searcher; // for javadoc
import org.apache.lucene.index.IndexReader; // for javadoc
/** Documents are the unit of indexing and search.
@@ -32,8 +32,7 @@ import org.apache.lucene.index.IndexRead
*
* <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
* <i>not</i> available in documents retrieved from the index, e.g. with {@link
- * ScoreDoc#doc}, {@link Searcher#doc(int)} or {@link
- * IndexReader#document(int)}.
+ * ScoreDoc#doc} or {@link IndexReader#document(int)}.
*/
public final class Document implements java.io.Serializable {
@@ -167,7 +166,7 @@ public final class Document implements j
/** Returns a List of all the fields in a document.
* <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
* <i>not</i> available in documents retrieved from the
- * index, e.g. {@link Searcher#doc(int)} or {@link
+ * index, e.g. {@link IndexSearcher#doc(int)} or {@link
* IndexReader#document(int)}.
*/
public final List<Fieldable> getFields() {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Fieldable.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Fieldable.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Fieldable.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Fieldable.java Thu Jan 13 02:09:33 2011
@@ -64,7 +64,7 @@ public interface Fieldable extends Seria
*
* <p>Note: this value is not stored directly with the document in the index.
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
- * {@link org.apache.lucene.search.Searcher#doc(int)} may thus not have the same value present as when
+ * {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when
* this field was indexed.
*
* @see #setBoost(float)
@@ -206,7 +206,7 @@ public interface Fieldable extends Seria
* required in the index, it also means any query
* requiring positional information, such as {@link
* PhraseQuery} or {@link SpanQuery} subclasses will
- * silently fail to find results.
+ * fail with an exception.
*/
void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions);
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java Thu Jan 13 02:09:33 2011
@@ -26,6 +26,7 @@ import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
@@ -364,12 +365,15 @@ class BufferedDeletes {
// Delete by query
if (deletes.queries.size() > 0) {
IndexSearcher searcher = new IndexSearcher(reader);
+ assert searcher.getTopReaderContext().isAtomic;
+ final AtomicReaderContext readerContext = (AtomicReaderContext) searcher.getTopReaderContext();
try {
for (Entry<Query, Integer> entry : deletes.queries.entrySet()) {
Query query = entry.getKey();
int limit = entry.getValue().intValue();
Weight weight = query.weight(searcher);
- Scorer scorer = weight.scorer(reader, true, false);
+
+ Scorer scorer = weight.scorer(readerContext, true, false);
if (scorer != null) {
while(true) {
int doc = scorer.nextDoc();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Thu Jan 13 02:09:33 2011
@@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
@@ -31,13 +30,11 @@ import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
@@ -62,9 +59,8 @@ class DirectoryReader extends IndexReade
private boolean rollbackHasChanges;
private SegmentReader[] subReaders;
+ private ReaderContext topLevelReaderContext;
private int[] starts; // 1st docno for each segment
- private final Map<SegmentReader,ReaderUtil.Slice> subReaderToSlice = new HashMap<SegmentReader,ReaderUtil.Slice>();
- private Map<String,byte[]> normsCache = new HashMap<String,byte[]>();
private int maxDoc = 0;
private int numDocs = -1;
private boolean hasDeletions = false;
@@ -186,7 +182,7 @@ class DirectoryReader extends IndexReade
/** This constructor is only used for {@link #reopen()} */
DirectoryReader(Directory directory, SegmentInfos infos, SegmentReader[] oldReaders, int[] oldStarts,
- Map<String,byte[]> oldNormsCache, boolean readOnly, boolean doClone, int termInfosIndexDivisor, CodecProvider codecs) throws IOException {
+ boolean readOnly, boolean doClone, int termInfosIndexDivisor, CodecProvider codecs) throws IOException {
this.directory = directory;
this.readOnly = readOnly;
this.segmentInfos = infos;
@@ -274,38 +270,6 @@ class DirectoryReader extends IndexReade
// initialize the readers to calculate maxDoc before we try to reuse the old normsCache
initialize(newReaders);
-
- // try to copy unchanged norms from the old normsCache to the new one
- if (oldNormsCache != null) {
- for (Map.Entry<String,byte[]> entry: oldNormsCache.entrySet()) {
- String field = entry.getKey();
- if (!hasNorms(field)) {
- continue;
- }
-
- byte[] oldBytes = entry.getValue();
-
- byte[] bytes = new byte[maxDoc()];
-
- for (int i = 0; i < subReaders.length; i++) {
- Integer oldReaderIndex = segmentReaders.get(subReaders[i].getSegmentName());
-
- // this SegmentReader was not re-opened, we can copy all of its norms
- if (oldReaderIndex != null &&
- (oldReaders[oldReaderIndex.intValue()] == subReaders[i]
- || oldReaders[oldReaderIndex.intValue()].norms.get(field) == subReaders[i].norms.get(field))) {
- // we don't have to synchronize here: either this constructor is called from a SegmentReader,
- // in which case no old norms cache is present, or it is called from MultiReader.reopen(),
- // which is synchronized
- System.arraycopy(oldBytes, oldStarts[oldReaderIndex.intValue()], bytes, starts[i], starts[i+1] - starts[i]);
- } else {
- subReaders[i].norms(field, bytes, starts[i]);
- }
- }
-
- normsCache.put(field, bytes); // update cache
- }
- }
}
/** {@inheritDoc} */
@@ -335,25 +299,22 @@ class DirectoryReader extends IndexReade
private void initialize(SegmentReader[] subReaders) throws IOException {
this.subReaders = subReaders;
starts = new int[subReaders.length + 1]; // build starts array
-
+ final AtomicReaderContext[] subReaderCtx = new AtomicReaderContext[subReaders.length];
+ topLevelReaderContext = new CompositeReaderContext(this, subReaderCtx, subReaderCtx);
final List<Fields> subFields = new ArrayList<Fields>();
- final List<ReaderUtil.Slice> fieldSlices = new ArrayList<ReaderUtil.Slice>();
-
+
for (int i = 0; i < subReaders.length; i++) {
starts[i] = maxDoc;
+ subReaderCtx[i] = new AtomicReaderContext(topLevelReaderContext, subReaders[i], i, maxDoc, i, maxDoc);
maxDoc += subReaders[i].maxDoc(); // compute maxDocs
if (subReaders[i].hasDeletions()) {
hasDeletions = true;
}
-
- final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i], subReaders[i].maxDoc(), i);
- subReaderToSlice.put(subReaders[i], slice);
-
+
final Fields f = subReaders[i].fields();
if (f != null) {
subFields.add(f);
- fieldSlices.add(slice);
}
}
starts[subReaders.length] = maxDoc;
@@ -497,7 +458,7 @@ class DirectoryReader extends IndexReade
private synchronized DirectoryReader doReopen(SegmentInfos infos, boolean doClone, boolean openReadOnly) throws CorruptIndexException, IOException {
DirectoryReader reader;
- reader = new DirectoryReader(directory, infos, subReaders, starts, normsCache, openReadOnly, doClone, termInfosIndexDivisor, codecs);
+ reader = new DirectoryReader(directory, infos, subReaders, starts, openReadOnly, doClone, termInfosIndexDivisor, codecs);
return reader;
}
@@ -637,41 +598,18 @@ class DirectoryReader extends IndexReade
@Override
public synchronized byte[] norms(String field) throws IOException {
ensureOpen();
- byte[] bytes = normsCache.get(field);
- if (bytes != null)
- return bytes; // cache hit
- if (!hasNorms(field))
- return null;
-
- bytes = new byte[maxDoc()];
- for (int i = 0; i < subReaders.length; i++)
- subReaders[i].norms(field, bytes, starts[i]);
- normsCache.put(field, bytes); // update cache
- return bytes;
+ throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
}
@Override
public synchronized void norms(String field, byte[] result, int offset)
throws IOException {
- ensureOpen();
- byte[] bytes = normsCache.get(field);
- if (bytes==null && !hasNorms(field)) {
- Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f));
- } else if (bytes != null) { // cache hit
- System.arraycopy(bytes, 0, result, offset, maxDoc());
- } else {
- for (int i = 0; i < subReaders.length; i++) { // read from segments
- subReaders[i].norms(field, result, offset + starts[i]);
- }
- }
+ throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
}
@Override
protected void doSetNorm(int n, String field, byte value)
throws CorruptIndexException, IOException {
- synchronized (normsCache) {
- normsCache.remove(field); // clear cache
- }
int i = readerIndex(n); // find segment num
subReaders[i].setNorm(n-starts[i], field, value); // dispatch
}
@@ -864,7 +802,6 @@ class DirectoryReader extends IndexReade
@Override
protected synchronized void doClose() throws IOException {
IOException ioe = null;
- normsCache = null;
for (int i = 0; i < subReaders.length; i++) {
// try to close each reader, even if an exception is thrown
try {
@@ -903,16 +840,16 @@ class DirectoryReader extends IndexReade
fieldSet.addAll(names);
}
return fieldSet;
- }
+ }
@Override
- public IndexReader[] getSequentialSubReaders() {
- return subReaders;
+ public ReaderContext getTopReaderContext() {
+ return topLevelReaderContext;
}
-
+
@Override
- public int getSubReaderDocBase(IndexReader subReader) {
- return subReaderToSlice.get(subReader).start;
+ public IndexReader[] getSequentialSubReaders() {
+ return subReaders;
}
/** Returns the directory this index resides in. */
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Thu Jan 13 02:09:33 2011
@@ -64,7 +64,6 @@ final class DocFieldProcessor extends Do
// FieldInfo.storePayload.
final String fileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELD_INFOS_EXTENSION);
fieldInfos.write(state.directory, fileName);
- state.flushedFiles.add(fileName);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Thu Jan 13 02:09:33 2011
@@ -599,7 +599,7 @@ final class DocumentsWriter {
if (infoStream != null) {
message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors"));
- message("flushedFiles=" + flushState.flushedFiles);
+ message("flushedFiles=" + newSegment.files());
message("flushed codecs=" + newSegment.getSegmentCodecs());
}
@@ -611,12 +611,11 @@ final class DocumentsWriter {
}
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, cfsFileName);
- for(String fileName : flushState.flushedFiles) {
+ for(String fileName : newSegment.files()) {
cfsWriter.addFile(fileName);
}
cfsWriter.close();
- deleter.deleteNewFiles(flushState.flushedFiles);
-
+ deleter.deleteNewFiles(newSegment.files());
newSegment.setUseCompoundFile(true);
}
@@ -906,7 +905,8 @@ final class DocumentsWriter {
final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK;
/* if you increase this, you must fix field cache impl for
- * getTerms/getTermsIndex requires <= 32768 */
+ * getTerms/getTermsIndex requires <= 32768. Also fix
+ * DeltaBytesWriter's TERM_EOF if necessary. */
final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2;
/* Initial chunks size of the shared int[] blocks used to
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInvertState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInvertState.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInvertState.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FieldInvertState.java Thu Jan 13 02:09:33 2011
@@ -73,6 +73,10 @@ public final class FieldInvertState {
return length;
}
+ public void setLength(int length) {
+ this.length = length;
+ }
+
/**
* Get the number of terms with <code>positionIncrement == 0</code>.
* @return the numOverlap
@@ -81,6 +85,10 @@ public final class FieldInvertState {
return numOverlap;
}
+ public void setNumOverlap(int numOverlap) {
+ this.numOverlap = numOverlap;
+ }
+
/**
* Get end offset of the last processed term.
* @return the offset
@@ -99,6 +107,10 @@ public final class FieldInvertState {
return boost;
}
+ public void setBoost(float boost) {
+ this.boost = boost;
+ }
+
public AttributeSource getAttributeSource() {
return attributeSource;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Thu Jan 13 02:09:33 2011
@@ -19,6 +19,7 @@ package org.apache.lucene.index;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
@@ -414,6 +415,11 @@ public class FilterIndexReader extends I
public IndexReader[] getSequentialSubReaders() {
return in.getSequentialSubReaders();
}
+
+ @Override
+ public ReaderContext getTopReaderContext() {
+ return in.getTopReaderContext();
+ }
@Override
public Fields fields() throws IOException {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexReader.java Thu Jan 13 02:09:33 2011
@@ -301,7 +301,7 @@ public abstract class IndexReader implem
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
* @param termInfosIndexDivisor Subsamples which indexed
* terms are loaded into RAM. This has the same effect as {@link
- * IndexWriter#setTermIndexInterval} except that setting
+ * IndexWriterConfig#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
@@ -351,7 +351,7 @@ public abstract class IndexReader implem
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
* @param termInfosIndexDivisor Subsamples which indexed
* terms are loaded into RAM. This has the same effect as {@link
- * IndexWriter#setTermIndexInterval} except that setting
+ * IndexWriterConfig#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
@@ -380,7 +380,7 @@ public abstract class IndexReader implem
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
* @param termInfosIndexDivisor Subsamples which indexed
* terms are loaded into RAM. This has the same effect as {@link
- * IndexWriter#setTermIndexInterval} except that setting
+ * IndexWriterConfig#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
@@ -413,7 +413,7 @@ public abstract class IndexReader implem
* @param readOnly true if no changes (deletions, norms) will be made with this IndexReader
* @param termInfosIndexDivisor Subsamples which indexed
* terms are loaded into RAM. This has the same effect as {@link
- * IndexWriter#setTermIndexInterval} except that setting
+ * IndexWriterConfig#setTermIndexInterval} except that setting
* must be done at indexing time while this setting can be
* set per reader. When set to N, then one in every
* N*termIndexInterval terms in the index is loaded into
@@ -542,7 +542,7 @@ public abstract class IndexReader implem
* file descriptors, CPU time) will be consumed.</p>
*
* <p>For lower latency on reopening a reader, you should
- * call {@link #setMergedSegmentWarmer} to
+ * call {@link IndexWriterConfig#setMergedSegmentWarmer} to
* pre-warm a newly merged segment before it's committed
* to the index. This is important for minimizing
* index-to-search delay after a large merge. </p>
@@ -1142,7 +1142,7 @@ public abstract class IndexReader implem
if (docs == null) return 0;
int n = 0;
int doc;
- while ((doc = docs.nextDoc()) != docs.NO_MORE_DOCS) {
+ while ((doc = docs.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
deleteDocument(doc);
n++;
}
@@ -1372,9 +1372,7 @@ public abstract class IndexReader implem
}
/** Expert: returns the sequential sub readers that this
- * reader is logically composed of. For example,
- * IndexSearcher uses this API to drive searching by one
- * sub reader at a time. If this reader is not composed
+ * reader is logically composed of. If this reader is not composed
* of sequential child readers, it should return null.
* If this method returns an empty array, that means this
* reader is a null reader (for example a MultiReader
@@ -1389,12 +1387,33 @@ public abstract class IndexReader implem
public IndexReader[] getSequentialSubReaders() {
return null;
}
-
-
- /** Expert: returns the docID base for this subReader. */
- public int getSubReaderDocBase(IndexReader subReader) {
- throw new UnsupportedOperationException();
- }
+
+ /**
+ * Expert: Returns a the root {@link ReaderContext} for this
+ * {@link IndexReader}'s sub-reader tree. Iff this reader is composed of sub
+ * readers ,ie. this reader being a composite reader, this method returns a
+ * {@link CompositeReaderContext} holding the reader's direct children as well as a
+ * view of the reader tree's atomic leaf contexts. All sub-
+ * {@link ReaderContext} instances referenced from this readers top-level
+ * context are private to this reader and are not shared with another context
+ * tree. For example, IndexSearcher uses this API to drive searching by one
+ * atomic leaf reader at a time. If this reader is not composed of child
+ * readers, this method returns an {@link AtomicReaderContext}.
+ * <p>
+ * Note: Any of the sub-{@link CompositeReaderContext} instances reference from this
+ * top-level context holds a <code>null</code> {@link CompositeReaderContext#leaves}
+ * reference. Only the top-level context maintains the convenience leaf-view
+ * for performance reasons.
+ * <p>
+ * NOTE: You should not try using sub-readers returned by this method to make
+ * any changes (setNorm, deleteDocument, etc.). While this might succeed for
+ * one composite reader (like MultiReader), it will most likely lead to index
+ * corruption for other readers (like DirectoryReader obtained through
+ * {@link #open}. Use the top-level context's reader directly.
+ *
+ * @lucene.experimental
+ */
+ public abstract ReaderContext getTopReaderContext();
/** Expert */
public Object getCoreCacheKey() {
@@ -1447,4 +1466,132 @@ public abstract class IndexReader implem
Fields retrieveFields() {
return fields;
}
+
+ /**
+ * A struct like class that represents a hierarchical relationship between
+ * {@link IndexReader} instances.
+ * @lucene.experimental
+ */
+ public static abstract class ReaderContext {
+ /** The reader context for this reader's immediate parent, or null if none */
+ public final ReaderContext parent;
+ /** The actual reader */
+ public final IndexReader reader;
+ /** <code>true</code> iff the reader is an atomic reader */
+ public final boolean isAtomic;
+ /** <code>true</code> if this context struct represents the top level reader within the hierarchical context */
+ public final boolean isTopLevel;
+ /** the doc base for this reader in the parent, <tt>0</tt> if parent is null */
+ public final int docBaseInParent;
+ /** the ord for this reader in the parent, <tt>0</tt> if parent is null */
+ public final int ordInParent;
+
+ ReaderContext(ReaderContext parent, IndexReader reader,
+ boolean isAtomic, int ordInParent, int docBaseInParent) {
+ this.parent = parent;
+ this.reader = reader;
+ this.isAtomic = isAtomic;
+ this.docBaseInParent = docBaseInParent;
+ this.ordInParent = ordInParent;
+ this.isTopLevel = parent==null;
+ }
+
+ /**
+ * Returns the context's leaves if this context is a top-level context
+ * otherwise <code>null</code>.
+ * <p>
+ * Note: this is convenience method since leaves can always be obtained by
+ * walking the context tree.
+ */
+ public AtomicReaderContext[] leaves() {
+ return null;
+ }
+
+ /**
+ * Returns the context's children iff this context is a composite context
+ * otherwise <code>null</code>.
+ * <p>
+ * Note: this method is a convenience method to prevent
+ * <code>instanceof</code> checks and type-casts to
+ * {@link CompositeReaderContext}.
+ */
+ public ReaderContext[] children() {
+ return null;
+ }
+ }
+
+ /**
+ * {@link ReaderContext} for composite {@link IndexReader} instance.
+ * @lucene.experimental
+ */
+ public static final class CompositeReaderContext extends ReaderContext {
+ /** the composite readers immediate children */
+ public final ReaderContext[] children;
+ /** the composite readers leaf reader contexts if this is the top level reader in this context */
+ public final AtomicReaderContext[] leaves;
+
+ /**
+ * Creates a {@link CompositeReaderContext} for intermediate readers that aren't
+ * not top-level readers in the current context
+ */
+ public CompositeReaderContext(ReaderContext parent, IndexReader reader,
+ int ordInParent, int docbaseInParent, ReaderContext[] children) {
+ this(parent, reader, ordInParent, docbaseInParent, children, null);
+ }
+
+ /**
+ * Creates a {@link CompositeReaderContext} for top-level readers with parent set to <code>null</code>
+ */
+ public CompositeReaderContext(IndexReader reader, ReaderContext[] children, AtomicReaderContext[] leaves) {
+ this(null, reader, 0, 0, children, leaves);
+ }
+
+ private CompositeReaderContext(ReaderContext parent, IndexReader reader,
+ int ordInParent, int docbaseInParent, ReaderContext[] children,
+ AtomicReaderContext[] leaves) {
+ super(parent, reader, false, ordInParent, docbaseInParent);
+ this.children = children;
+ this.leaves = leaves;
+ }
+
+ @Override
+ public AtomicReaderContext[] leaves() {
+ return leaves;
+ }
+
+
+ @Override
+ public ReaderContext[] children() {
+ return children;
+ }
+ }
+
+ /**
+ * {@link ReaderContext} for atomic {@link IndexReader} instances
+ * @lucene.experimental
+ */
+ public static final class AtomicReaderContext extends ReaderContext {
+ /** The readers ord in the top-level's leaves array */
+ public final int ord;
+ /** The readers absolute doc base */
+ public final int docBase;
+ /**
+ * Creates a new {@link AtomicReaderContext}
+ */
+ public AtomicReaderContext(ReaderContext parent, IndexReader reader,
+ int ord, int docBase, int leafOrd, int leafDocBase) {
+ super(parent, reader, true, ord, docBase);
+ assert reader.getSequentialSubReaders() == null : "Atomic readers must not have subreaders";
+ this.ord = leafOrd;
+ this.docBase = leafDocBase;
+ }
+
+ /**
+ * Creates a new {@link AtomicReaderContext} for a atomic reader without an immediate
+ * parent.
+ */
+ public AtomicReaderContext(IndexReader atomicReader) {
+ this(null, atomicReader, 0, 0, 0, 0);
+ }
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriter.java Thu Jan 13 02:09:33 2011
@@ -2256,11 +2256,11 @@ public class IndexWriter implements Clos
// Now create the compound file if needed
if (useCompoundFile) {
merger.createCompoundFile(mergedName + ".cfs", info);
- info.setUseCompoundFile(true);
// delete new non cfs files directly: they were never
// registered with IFD
- deleter.deleteNewFiles(merger.getMergedFiles(info));
+ deleter.deleteNewFiles(info.files());
+ info.setUseCompoundFile(true);
}
// Register the new segment
@@ -3159,7 +3159,7 @@ public class IndexWriter implements Clos
synchronized(this) {
deleter.deleteFile(compoundFileName);
- deleter.deleteNewFiles(merger.getMergedFiles(merge.info));
+ deleter.deleteNewFiles(merge.info.files());
}
}
}
@@ -3170,7 +3170,7 @@ public class IndexWriter implements Clos
// delete new non cfs files directly: they were never
// registered with IFD
- deleter.deleteNewFiles(merger.getMergedFiles(merge.info));
+ deleter.deleteNewFiles(merge.info.files());
if (merge.isAborted()) {
if (infoStream != null) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/IndexWriterConfig.java Thu Jan 13 02:09:33 2011
@@ -55,7 +55,7 @@ public final class IndexWriterConfig imp
public static enum OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
/** Default value is 32. Change using {@link #setTermIndexInterval(int)}. */
- public static final int DEFAULT_TERM_INDEX_INTERVAL = 32;
+ public static final int DEFAULT_TERM_INDEX_INTERVAL = 32; // TODO: this should be private to the codec, not settable here
/** Denotes a flush trigger is disabled. */
public final static int DISABLE_AUTO_FLUSH = -1;
@@ -115,7 +115,7 @@ public final class IndexWriterConfig imp
private OpenMode openMode;
private int maxFieldLength;
private Similarity similarity;
- private int termIndexInterval;
+ private int termIndexInterval; // TODO: this should be private to the codec, not settable here
private MergeScheduler mergeScheduler;
private long writeLockTimeout;
private int maxBufferedDeleteTerms;
@@ -147,7 +147,7 @@ public final class IndexWriterConfig imp
openMode = OpenMode.CREATE_OR_APPEND;
maxFieldLength = UNLIMITED_FIELD_LENGTH;
similarity = Similarity.getDefault();
- termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
+ termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here
mergeScheduler = new ConcurrentMergeScheduler();
writeLockTimeout = WRITE_LOCK_TIMEOUT;
maxBufferedDeleteTerms = DEFAULT_MAX_BUFFERED_DELETE_TERMS;
@@ -312,7 +312,7 @@ public final class IndexWriterConfig imp
*
* @see #DEFAULT_TERM_INDEX_INTERVAL
*/
- public IndexWriterConfig setTermIndexInterval(int interval) {
+ public IndexWriterConfig setTermIndexInterval(int interval) { // TODO: this should be private to the codec, not settable here
this.termIndexInterval = interval;
return this;
}
@@ -322,7 +322,7 @@ public final class IndexWriterConfig imp
*
* @see #setTermIndexInterval(int)
*/
- public int getTermIndexInterval() {
+ public int getTermIndexInterval() { // TODO: this should be private to the codec, not settable here
return termIndexInterval;
}
@@ -613,7 +613,7 @@ public final class IndexWriterConfig imp
sb.append("openMode=").append(openMode).append("\n");
sb.append("maxFieldLength=").append(maxFieldLength).append("\n");
sb.append("similarity=").append(similarity.getClass().getName()).append("\n");
- sb.append("termIndexInterval=").append(termIndexInterval).append("\n");
+ sb.append("termIndexInterval=").append(termIndexInterval).append("\n"); // TODO: this should be private to the codec, not settable here
sb.append("mergeScheduler=").append(mergeScheduler.getClass().getName()).append("\n");
sb.append("default WRITE_LOCK_TIMEOUT=").append(WRITE_LOCK_TIMEOUT).append("\n");
sb.append("writeLockTimeout=").append(writeLockTimeout).append("\n");
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java Thu Jan 13 02:09:33 2011
@@ -241,6 +241,9 @@ public abstract class LogMergePolicy ext
while (start >= 0) {
SegmentInfo info = infos.info(start);
if (size(info) > maxMergeSize || sizeDocs(info) > maxMergeDocs) {
+ if (verbose()) {
+ message("optimize: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSize + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")");
+ }
// need to skip that segment + add a merge for the 'right' segments,
// unless there is only 1 which is optimized.
if (last - start - 1 > 1 || (start != last - 1 && !isOptimized(infos.info(start + 1)))) {
@@ -335,10 +338,18 @@ public abstract class LogMergePolicy ext
int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
assert maxNumSegments > 0;
+ if (verbose()) {
+ message("findMergesForOptimize: maxNumSegs=" + maxNumSegments + " segsToOptimize= "+ segmentsToOptimize);
+ }
// If the segments are already optimized (e.g. there's only 1 segment), or
// there are <maxNumSegements, all optimized, nothing to do.
- if (isOptimized(infos, maxNumSegments, segmentsToOptimize)) return null;
+ if (isOptimized(infos, maxNumSegments, segmentsToOptimize)) {
+ if (verbose()) {
+ message("already optimized; skip");
+ }
+ return null;
+ }
// Find the newest (rightmost) segment that needs to
// be optimized (other segments may have been flushed
@@ -352,10 +363,20 @@ public abstract class LogMergePolicy ext
}
}
- if (last == 0) return null;
+ if (last == 0) {
+ if (verbose()) {
+ message("last == 0; skip");
+ }
+ return null;
+ }
// There is only one segment already, and it is optimized
- if (maxNumSegments == 1 && last == 1 && isOptimized(infos.info(0))) return null;
+ if (maxNumSegments == 1 && last == 1 && isOptimized(infos.info(0))) {
+ if (verbose()) {
+ message("already 1 seg; skip");
+ }
+ return null;
+ }
// Check if there are any segments above the threshold
boolean anyTooLarge = false;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiReader.java Thu Jan 13 02:09:33 2011
@@ -18,14 +18,12 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@@ -35,10 +33,9 @@ import org.apache.lucene.util.ReaderUtil
* their content. */
public class MultiReader extends IndexReader implements Cloneable {
protected IndexReader[] subReaders;
+ private final ReaderContext topLevelContext;
private int[] starts; // 1st docno for each segment
- private final Map<IndexReader,ReaderUtil.Slice> subReaderToSlice = new HashMap<IndexReader,ReaderUtil.Slice>();
private boolean[] decrefOnClose; // remember which subreaders to decRef on close
- private Map<String,byte[]> normsCache = new HashMap<String,byte[]>();
private int maxDoc = 0;
private int numDocs = -1;
private boolean hasDeletions = false;
@@ -51,7 +48,7 @@ public class MultiReader extends IndexRe
* @param subReaders set of (sub)readers
*/
public MultiReader(IndexReader... subReaders) throws IOException {
- initialize(subReaders, true);
+ topLevelContext = initialize(subReaders, true);
}
/**
@@ -63,14 +60,13 @@ public class MultiReader extends IndexRe
* @param subReaders set of (sub)readers
*/
public MultiReader(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
- initialize(subReaders, closeSubReaders);
+ topLevelContext = initialize(subReaders, closeSubReaders);
}
- private void initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
+ private ReaderContext initialize(IndexReader[] subReaders, boolean closeSubReaders) throws IOException {
this.subReaders = subReaders.clone();
starts = new int[subReaders.length + 1]; // build starts array
decrefOnClose = new boolean[subReaders.length];
-
for (int i = 0; i < subReaders.length; i++) {
starts[i] = maxDoc;
maxDoc += subReaders[i].maxDoc(); // compute maxDocs
@@ -85,14 +81,9 @@ public class MultiReader extends IndexRe
if (subReaders[i].hasDeletions()) {
hasDeletions = true;
}
-
- final ReaderUtil.Slice slice = new ReaderUtil.Slice(starts[i],
- subReaders[i].maxDoc(),
- i);
- subReaderToSlice.put(subReaders[i], slice);
}
-
starts[subReaders.length] = maxDoc;
+ return ReaderUtil.buildReaderContext(this);
}
@Override
@@ -101,11 +92,6 @@ public class MultiReader extends IndexRe
}
@Override
- public int getSubReaderDocBase(IndexReader subReader) {
- return subReaderToSlice.get(subReader).start;
- }
-
- @Override
public Fields fields() throws IOException {
throw new UnsupportedOperationException("please use MultiFields.getFields, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level Fields");
}
@@ -316,45 +302,18 @@ public class MultiReader extends IndexRe
@Override
public synchronized byte[] norms(String field) throws IOException {
- ensureOpen();
- byte[] bytes = normsCache.get(field);
- if (bytes != null)
- return bytes; // cache hit
- if (!hasNorms(field))
- return null;
-
- bytes = new byte[maxDoc()];
- for (int i = 0; i < subReaders.length; i++)
- subReaders[i].norms(field, bytes, starts[i]);
- normsCache.put(field, bytes); // update cache
- return bytes;
+ throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
}
@Override
public synchronized void norms(String field, byte[] result, int offset)
throws IOException {
- ensureOpen();
- byte[] bytes = normsCache.get(field);
- for (int i = 0; i < subReaders.length; i++) // read from segments
- subReaders[i].norms(field, result, offset + starts[i]);
-
- if (bytes==null && !hasNorms(field)) {
- Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f));
- } else if (bytes != null) { // cache hit
- System.arraycopy(bytes, 0, result, offset, maxDoc());
- } else {
- for (int i = 0; i < subReaders.length; i++) { // read from segments
- subReaders[i].norms(field, result, offset + starts[i]);
- }
- }
+ throw new UnsupportedOperationException("please use MultiNorms.norms, or wrap your IndexReader with SlowMultiReaderWrapper, if you really need a top level norms");
}
@Override
protected void doSetNorm(int n, String field, byte value)
throws CorruptIndexException, IOException {
- synchronized (normsCache) {
- normsCache.remove(field); // clear cache
- }
int i = readerIndex(n); // find segment num
subReaders[i].setNorm(n-starts[i], field, value); // dispatch
}
@@ -433,4 +392,8 @@ public class MultiReader extends IndexRe
public IndexReader[] getSequentialSubReaders() {
return subReaders;
}
+
+ public ReaderContext getTopReaderContext() {
+ return topLevelContext;
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriter.java Thu Jan 13 02:09:33 2011
@@ -37,7 +37,7 @@ import org.apache.lucene.search.Similari
final class NormsWriter extends InvertedDocEndConsumer {
- private static final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
+ private final byte defaultNorm = Similarity.getDefault().encodeNormValue(1.0f);
private FieldInfos fieldInfos;
@Override
public InvertedDocEndConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) {
@@ -89,7 +89,6 @@ final class NormsWriter extends Inverted
}
final String normsFileName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.NORMS_EXTENSION);
- state.flushedFiles.add(normsFileName);
IndexOutput normsOut = state.directory.createOutput(normsFileName);
try {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/NormsWriterPerField.java Thu Jan 13 02:09:33 2011
@@ -73,7 +73,7 @@ final class NormsWriterPerField extends
norms = ArrayUtil.grow(norms, 1+upto);
}
final float norm = docState.similarity.computeNorm(fieldInfo.name, fieldState);
- norms[upto] = Similarity.getDefault().encodeNormValue(norm);
+ norms[upto] = docState.similarity.encodeNormValue(norm);
docIDs[upto] = docState.docID;
upto++;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ParallelReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ParallelReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ParallelReader.java Thu Jan 13 02:09:33 2011
@@ -21,8 +21,11 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
+import org.apache.lucene.search.Similarity;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
@@ -53,7 +56,8 @@ public class ParallelReader extends Inde
private SortedMap<String,IndexReader> fieldToReader = new TreeMap<String,IndexReader>();
private Map<IndexReader,Collection<String>> readerToFields = new HashMap<IndexReader,Collection<String>>();
private List<IndexReader> storedFieldReaders = new ArrayList<IndexReader>();
-
+ private Map<String,byte[]> normsCache = new HashMap<String,byte[]>();
+ private final ReaderContext topLevelReaderContext = new AtomicReaderContext(this);
private int maxDoc;
private int numDocs;
private boolean hasDeletions;
@@ -88,7 +92,7 @@ public class ParallelReader extends Inde
buffer.append(')');
return buffer.toString();
}
-
+
/** Add an IndexReader.
* @throws IOException if there is a low-level IO error
*/
@@ -141,6 +145,9 @@ public class ParallelReader extends Inde
reader.incRef();
}
decrefOnClose.add(Boolean.valueOf(incRefReaders));
+ synchronized(normsCache) {
+ normsCache.clear(); // TODO: don't need to clear this for all fields really?
+ }
}
private class ParallelFieldsEnum extends FieldsEnum {
@@ -278,6 +285,7 @@ public class ParallelReader extends Inde
if (reopened) {
List<Boolean> newDecrefOnClose = new ArrayList<Boolean>();
+ // TODO: maybe add a special reopen-ctor for norm-copying?
ParallelReader pr = new ParallelReader();
for (int i = 0; i < readers.size(); i++) {
IndexReader oldReader = readers.get(i);
@@ -419,27 +427,51 @@ public class ParallelReader extends Inde
}
@Override
- public byte[] norms(String field) throws IOException {
+ public synchronized byte[] norms(String field) throws IOException {
ensureOpen();
IndexReader reader = fieldToReader.get(field);
- return reader==null ? null : reader.norms(field);
+
+ if (reader==null)
+ return null;
+
+ byte[] bytes = normsCache.get(field);
+ if (bytes != null)
+ return bytes;
+ if (!hasNorms(field))
+ return null;
+
+ bytes = MultiNorms.norms(reader, field);
+ normsCache.put(field, bytes);
+ return bytes;
}
@Override
- public void norms(String field, byte[] result, int offset)
+ public synchronized void norms(String field, byte[] result, int offset)
throws IOException {
+ // TODO: maybe optimize
ensureOpen();
IndexReader reader = fieldToReader.get(field);
- if (reader!=null)
- reader.norms(field, result, offset);
+ if (reader==null)
+ return;
+
+ byte[] norms = norms(field);
+ if (norms == null) {
+ Arrays.fill(result, offset, result.length, Similarity.getDefault().encodeNormValue(1.0f));
+ } else {
+ System.arraycopy(norms, 0, result, offset, maxDoc());
+ }
}
@Override
protected void doSetNorm(int n, String field, byte value)
throws CorruptIndexException, IOException {
IndexReader reader = fieldToReader.get(field);
- if (reader!=null)
+ if (reader!=null) {
+ synchronized(normsCache) {
+ normsCache.remove(field);
+ }
reader.doSetNorm(n, field, value);
+ }
}
@Override
@@ -529,6 +561,11 @@ public class ParallelReader extends Inde
}
return fieldSet;
}
+ @Override
+ public ReaderContext getTopReaderContext() {
+ return topLevelReaderContext;
+ }
+
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentMerger.java Thu Jan 13 02:09:33 2011
@@ -20,8 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Set;
-import java.util.HashSet;
import java.util.List;
import org.apache.lucene.document.Document;
@@ -127,41 +125,11 @@ final class SegmentMerger {
return mergedDocs;
}
- final Collection<String> getMergedFiles(final SegmentInfo info) throws IOException {
- Set<String> fileSet = new HashSet<String>();
-
- // Basic files
- for (String ext : IndexFileNames.COMPOUND_EXTENSIONS_NOT_CODEC) {
- fileSet.add(IndexFileNames.segmentFileName(segment, "", ext));
- }
-
- segmentWriteState.segmentCodecs.files(directory, info, fileSet);
-
- // Fieldable norm files
- int numFIs = fieldInfos.size();
- for (int i = 0; i < numFIs; i++) {
- FieldInfo fi = fieldInfos.fieldInfo(i);
- if (fi.isIndexed && !fi.omitNorms) {
- fileSet.add(IndexFileNames.segmentFileName(segment, "", IndexFileNames.NORMS_EXTENSION));
- break;
- }
- }
-
- // Vector files
- if (fieldInfos.hasVectors()) {
- for (String ext : IndexFileNames.VECTOR_EXTENSIONS) {
- fileSet.add(IndexFileNames.segmentFileName(segment, "", ext));
- }
- }
-
- return fileSet;
- }
-
final Collection<String> createCompoundFile(String fileName, final SegmentInfo info)
throws IOException {
// Now merge all added files
- Collection<String> files = getMergedFiles(info);
+ Collection<String> files = info.files();
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort);
for (String file : files) {
cfsWriter.addFile(file);
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReadState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReadState.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReadState.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReadState.java Thu Jan 13 02:09:33 2011
@@ -33,7 +33,7 @@ public class SegmentReadState {
// terms index on init (preflex is the only once currently
// that must do so), then it should negate this value to
// get the app's terms divisor:
- public final int termsIndexDivisor;
+ public int termsIndexDivisor;
public final String codecId;
public SegmentReadState(Directory dir, SegmentInfo info,
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentReader.java Thu Jan 13 02:09:33 2011
@@ -51,7 +51,7 @@ public class SegmentReader extends Index
private SegmentInfo si;
private int readBufferSize;
-
+ private final ReaderContext readerContext = new AtomicReaderContext(this);
CloseableThreadLocal<FieldsReader> fieldsReaderLocal = new FieldsReaderLocal();
CloseableThreadLocal<TermVectorsReader> termVectorsLocal = new CloseableThreadLocal<TermVectorsReader>();
@@ -1183,6 +1183,11 @@ public class SegmentReader extends Index
buffer.append(si.toString(core.dir, pendingDeleteCount));
return buffer.toString();
}
+
+ @Override
+ public ReaderContext getTopReaderContext() {
+ return readerContext;
+ }
/**
* Return the name of the segment this reader is reading.
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java Thu Jan 13 02:09:33 2011
@@ -18,8 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.PrintStream;
-import java.util.Collection;
-import java.util.HashSet;
import org.apache.lucene.store.Directory;
@@ -33,7 +31,6 @@ public class SegmentWriteState {
public final FieldInfos fieldInfos;
public final int numDocs;
public boolean hasVectors;
- public final Collection<String> flushedFiles;
final SegmentCodecs segmentCodecs;
public final String codecId;
@@ -43,7 +40,7 @@ public class SegmentWriteState {
* faster, while larger values use less memory and make searching slightly
* slower. Searching is typically not dominated by dictionary lookup, so
* tweaking this is rarely useful.*/
- public final int termIndexInterval;
+ public int termIndexInterval; // TODO: this should be private to the codec, not settable here or in IWC
/** Expert: The fraction of TermDocs entries stored in skip tables,
* used to accelerate {@link DocsEnum#advance(int)}. Larger values result in
@@ -68,7 +65,6 @@ public class SegmentWriteState {
this.numDocs = numDocs;
this.termIndexInterval = termIndexInterval;
this.segmentCodecs = segmentCodecs;
- flushedFiles = new HashSet<String>();
codecId = "";
}
@@ -83,7 +79,6 @@ public class SegmentWriteState {
numDocs = state.numDocs;
termIndexInterval = state.termIndexInterval;
segmentCodecs = state.segmentCodecs;
- flushedFiles = state.flushedFiles;
this.codecId = codecId;
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/SlowMultiReaderWrapper.java Thu Jan 13 02:09:33 2011
@@ -18,11 +18,19 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+
+import org.apache.lucene.search.Similarity;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ReaderUtil; // javadoc
import org.apache.lucene.index.DirectoryReader; // javadoc
import org.apache.lucene.index.MultiReader; // javadoc
+import org.apache.lucene.index.IndexReader.ReaderContext;
/**
* This class forces a composite reader (eg a {@link
@@ -48,8 +56,12 @@ import org.apache.lucene.index.MultiRead
public final class SlowMultiReaderWrapper extends FilterIndexReader {
+ private final ReaderContext readerContext;
+ private final Map<String,byte[]> normsCache = new HashMap<String,byte[]>();
+
public SlowMultiReaderWrapper(IndexReader other) {
super(other);
+ readerContext = new AtomicReaderContext(this); // emulate atomic reader!
}
@Override
@@ -62,6 +74,7 @@ public final class SlowMultiReaderWrappe
return MultiFields.getDeletedDocs(in);
}
+
@Override
public IndexReader[] getSequentialSubReaders() {
return null;
@@ -71,4 +84,44 @@ public final class SlowMultiReaderWrappe
public String toString() {
return "SlowMultiReaderWrapper(" + in + ")";
}
+
+ @Override
+ public synchronized byte[] norms(String field) throws IOException {
+ ensureOpen();
+ byte[] bytes = normsCache.get(field);
+ if (bytes != null)
+ return bytes;
+ if (!hasNorms(field))
+ return null;
+
+ bytes = MultiNorms.norms(in, field);
+ normsCache.put(field, bytes);
+ return bytes;
+ }
+
+ @Override
+ public synchronized void norms(String field, byte[] bytes, int offset) throws IOException {
+ // TODO: maybe optimize
+ ensureOpen();
+ byte[] norms = norms(field);
+ if (norms == null) {
+ Arrays.fill(bytes, offset, bytes.length, Similarity.getDefault().encodeNormValue(1.0f));
+ } else {
+ System.arraycopy(norms, 0, bytes, offset, maxDoc());
+ }
+ }
+
+ @Override
+ public ReaderContext getTopReaderContext() {
+ return readerContext;
+ }
+
+ @Override
+ protected void doSetNorm(int n, String field, byte value)
+ throws CorruptIndexException, IOException {
+ synchronized(normsCache) {
+ normsCache.remove(field);
+ }
+ in.doSetNorm(n, field, value);
+ }
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java?rev=1058390&r1=1058389&r2=1058390&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java Thu Jan 13 02:09:33 2011
@@ -53,11 +53,7 @@ final class StoredFieldsWriter {
fieldsWriter = null;
lastDocID = 0;
- String fieldsName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELDS_EXTENSION);
String fieldsIdxName = IndexFileNames.segmentFileName(state.segmentName, "", IndexFileNames.FIELDS_INDEX_EXTENSION);
- state.flushedFiles.add(fieldsName);
- state.flushedFiles.add(fieldsIdxName);
-
if (4 + ((long) state.numDocs) * 8 != state.directory.fileLength(fieldsIdxName)) {
throw new RuntimeException("after flush: fdx size mismatch: " + state.numDocs + " docs vs " + state.directory.fileLength(fieldsIdxName) + " length in bytes of " + fieldsIdxName + " file exists?=" + state.directory.fileExists(fieldsIdxName));
}