You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/12/07 12:37:35 UTC
svn commit: r1643662 [3/6] - in /lucene/dev/branches/lucene6005: ./ lucene/
lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/
lucene/core/ lucene/core/src/java/org/apache/lucene/codecs/
lucene/core/src/java/org/apache/lucene/co...
Modified: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/TestSearcherManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/TestSearcherManager.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/TestSearcherManager.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/TestSearcherManager.java Sun Dec 7 11:37:32 2014
@@ -30,16 +30,20 @@ import java.util.concurrent.atomic.Atomi
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FilterDirectoryReader;
+import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.ThreadedIndexingAndSearchingTestCase;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NamedThreadFactory;
import org.apache.lucene.util.TestUtil;
@@ -444,5 +448,51 @@ public class TestSearcherManager extends
sm.close();
dir.close();
}
-
+
+ private static class MyFilterLeafReader extends FilterLeafReader {
+ public MyFilterLeafReader(LeafReader in) {
+ super(in);
+ }
+ }
+
+ private static class MyFilterDirectoryReader extends FilterDirectoryReader {
+ public MyFilterDirectoryReader(DirectoryReader in) {
+ super(in,
+ new FilterDirectoryReader.SubReaderWrapper() {
+ @Override
+ public LeafReader wrap(LeafReader reader) {
+ return new MyFilterLeafReader(reader);
+ }
+ });
+ }
+
+ @Override
+ protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) {
+ return new MyFilterDirectoryReader(in);
+ }
+ }
+
+ // LUCENE-6087
+ public void testCustomDirectoryReader() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+ DirectoryReader reader = new MyFilterDirectoryReader(w.getReader());
+ SearcherManager mgr = new SearcherManager(reader, null);
+ for(int i=0;i<10;i++) {
+ w.addDocument(w.newDocument());
+ mgr.maybeRefresh();
+ IndexSearcher s = mgr.acquire();
+ try {
+ assertTrue(s.getIndexReader() instanceof MyFilterDirectoryReader);
+ for (LeafReaderContext ctx : s.getIndexReader().leaves()) {
+ assertTrue(ctx.reader() instanceof MyFilterLeafReader);
+ }
+ } finally {
+ mgr.release(s);
+ }
+ }
+ mgr.close();
+ w.close();
+ dir.close();
+ }
}
Copied: lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java (from r1643659, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java?p2=lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java&p1=lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java&r1=1643659&r2=1643662&rev=1643662&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java (original)
+++ lucene/dev/branches/lucene6005/lucene/core/src/test/org/apache/lucene/search/TestUsageTrackingFilterCachingPolicy.java Sun Dec 7 11:37:32 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.search;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.RoaringDocIdSet;
public class TestUsageTrackingFilterCachingPolicy extends LuceneTestCase {
@@ -37,7 +38,7 @@ public class TestUsageTrackingFilterCach
public void testCostlyFilter() {
assertTrue(UsageTrackingFilterCachingPolicy.isCostly(new PrefixFilter(new Term("field", "prefix"))));
- assertTrue(UsageTrackingFilterCachingPolicy.isCostly(NumericRangeFilter.newIntRange("intField", 8, 1, 1000, true, true)));
+ assertTrue(UsageTrackingFilterCachingPolicy.isCostly(new TermRangeFilter("intField", NumericUtils.intToBytes(1), NumericUtils.intToBytes(1000), true, true)));
assertFalse(UsageTrackingFilterCachingPolicy.isCostly(new QueryWrapperFilter(new TermQuery(new Term("field", "value")))));
}
Modified: lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/facet/src/test/org/apache/lucene/facet/SlowRAMDirectory.java Sun Dec 7 11:37:32 2014
@@ -143,6 +143,7 @@ public class SlowRAMDirectory extends RA
private final Random rand;
public SlowIndexOutput(IndexOutput io) {
+ super("SlowIndexOutput(" + io + ")");
this.io = io;
this.rand = forkRandom();
}
Modified: lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java (original)
+++ lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java Sun Dec 7 11:37:32 2014
@@ -265,7 +265,8 @@ public class QueryScorer implements Scor
* {@link CachingTokenFilter} are wrapped in a {@link CachingTokenFilter} to
* ensure an efficient reset - if you are already using a different caching
* {@link TokenStream} impl and you don't want it to be wrapped, set this to
- * false.
+ * false. Note that term-vector based tokenstreams are detected and won't be
+ * wrapped either.
*/
public void setWrapIfNotCachingTokenFilter(boolean wrap) {
this.wrapToCaching = wrap;
Modified: lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Sun Dec 7 11:37:32 2014
@@ -36,7 +36,7 @@ import org.apache.lucene.index.Terms;
*/
public class TokenSources {
/**
- * A convenience method that tries to first get a TermPositionVector for the
+ * A convenience method that tries to first get a {@link TokenStreamFromTermVector} for the
* specified docId, then, falls back to using the passed in
* {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
* This is useful when you already have the document, but would prefer to use
Modified: lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/lucene6005/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Sun Dec 7 11:37:32 2014
@@ -16,6 +16,7 @@ package org.apache.lucene.search.highlig
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
@@ -29,13 +30,13 @@ import java.util.TreeSet;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.index.FilterLeafReader;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
@@ -43,7 +44,18 @@ import org.apache.lucene.index.TermConte
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.CommonTermsQuery;
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DisjunctionMaxQuery;
+import org.apache.lucene.search.FilteredQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
@@ -65,7 +77,7 @@ import org.apache.lucene.util.IOUtils;
public class WeightedSpanTermExtractor {
private String fieldName;
- private TokenStream tokenStream;
+ private TokenStream tokenStream;//set subsequent to getWeightedSpanTerms* methods
private String defaultField;
private boolean expandMultiTermQuery;
private boolean cachedTokenStream;
@@ -209,6 +221,8 @@ public class WeightedSpanTermExtractor {
sp.setBoost(query.getBoost());
extractWeightedSpanTerms(terms, sp);
}
+ } else if (query instanceof MatchAllDocsQuery) {
+ //nothing
} else {
Query origQuery = query;
if (query instanceof MultiTermQuery) {
@@ -357,18 +371,39 @@ public class WeightedSpanTermExtractor {
protected LeafReaderContext getLeafContext() throws IOException {
if (internalReader == null) {
- if(wrapToCaching && !(tokenStream instanceof CachingTokenFilter)) {
- assert !cachedTokenStream;
- tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
- cachedTokenStream = true;
- }
- final MemoryIndex indexer = new MemoryIndex(true);
- indexer.addField(DelegatingLeafReader.FIELD_NAME, tokenStream);
- tokenStream.reset();
- final IndexSearcher searcher = indexer.createSearcher();
- // MEM index has only atomic ctx
- internalReader = new DelegatingLeafReader(((LeafReaderContext)searcher.getTopReaderContext()).reader());
+ boolean cacheIt = wrapToCaching && !(tokenStream instanceof CachingTokenFilter);
+
+ // If it's from term vectors, simply wrap the underlying Terms in a reader
+ if (tokenStream instanceof TokenStreamFromTermVector) {
+ cacheIt = false;
+ Terms termVectorTerms = ((TokenStreamFromTermVector) tokenStream).getTermVectorTerms();
+ if (termVectorTerms.hasPositions() && termVectorTerms.hasOffsets()) {
+ internalReader = new TermVectorLeafReader(DelegatingLeafReader.FIELD_NAME, termVectorTerms);
+ }
+ }
+
+ // Use MemoryIndex (index/invert this tokenStream now)
+ if (internalReader == null) {
+ final MemoryIndex indexer = new MemoryIndex(true);
+ if (cacheIt) {
+ assert !cachedTokenStream;
+ tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+ cachedTokenStream = true;
+ indexer.addField(DelegatingLeafReader.FIELD_NAME, tokenStream);
+ } else {
+ indexer.addField(DelegatingLeafReader.FIELD_NAME,
+ new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
+ }
+ tokenStream.reset();//reset to beginning when we return
+ final IndexSearcher searcher = indexer.createSearcher();
+ // MEM index has only atomic ctx
+ internalReader = ((LeafReaderContext) searcher.getTopReaderContext()).reader();
+ }
+
+ //Now wrap it so we always use a common field.
+ this.internalReader = new DelegatingLeafReader(internalReader);
}
+
return internalReader.getContext();
}
@@ -532,7 +567,7 @@ public class WeightedSpanTermExtractor {
return terms;
}
-
+
protected void collectSpanQueryFields(SpanQuery spanQuery, Set<String> fieldNames) {
if (spanQuery instanceof FieldMaskingSpanQuery) {
collectSpanQueryFields(((FieldMaskingSpanQuery)spanQuery).getMaskedQuery(), fieldNames);
@@ -622,8 +657,11 @@ public class WeightedSpanTermExtractor {
public boolean isCachedTokenStream() {
return cachedTokenStream;
}
-
+
+ /** Returns the tokenStream which may have been wrapped in a CachingTokenFilter.
+ * getWeightedSpanTerms* sets the tokenStream, so don't call this before. */
public TokenStream getTokenStream() {
+ assert tokenStream != null;
return tokenStream;
}
@@ -632,12 +670,16 @@ public class WeightedSpanTermExtractor {
* {@link CachingTokenFilter} are wrapped in a {@link CachingTokenFilter} to
* ensure an efficient reset - if you are already using a different caching
* {@link TokenStream} impl and you don't want it to be wrapped, set this to
- * false.
+ * false. This setting is ignored when a term vector based TokenStream is supplied,
+ * since it can be reset efficiently.
*/
public void setWrapIfNotCachingTokenFilter(boolean wrap) {
this.wrapToCaching = wrap;
}
+ /** A threshold of number of characters to analyze. When a TokenStream based on
+ * term vectors with offsets and positions are supplied, this setting
+ * does not apply. */
protected final void setMaxDocCharsToAnalyze(int maxDocCharsToAnalyze) {
this.maxDocCharsToAnalyze = maxDocCharsToAnalyze;
}
Modified: lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Sun Dec 7 11:37:32 2014
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -35,6 +36,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldTypes;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@@ -73,7 +75,7 @@ public class HighlighterTest extends Bas
Directory ramDir;
public IndexSearcher searcher = null;
int numHighlights = 0;
- Analyzer analyzer;
+ MockAnalyzer analyzer;
TopDocs hits;
String[] texts = {
@@ -116,9 +118,8 @@ public class HighlighterTest extends Bas
}
public void testHighlightingCommonTermsQuery() throws Exception {
- Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
- query.add(new Term(FIELD_NAME, "this"));
+ query.add(new Term(FIELD_NAME, "this"));//stop-word
query.add(new Term(FIELD_NAME, "long"));
query.add(new Term(FIELD_NAME, "very"));
@@ -136,7 +137,7 @@ public class HighlighterTest extends Bas
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
highlighter.setTextFragmenter(fragmenter);
String fragment = highlighter.getBestFragment(stream, storedField);
- assertEquals("Hello <B>this</B> is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
+ assertEquals("Hello this is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
doc = searcher.doc(hits.scoreDocs[1].doc);
storedField = doc.getString(FIELD_NAME);
@@ -145,7 +146,7 @@ public class HighlighterTest extends Bas
.getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
fragment = highlighter.getBestFragment(stream, storedField);
- assertEquals("<B>This</B> piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
+ assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
}
public void testHighlightUnknowQueryAfterRewrite() throws IOException, InvalidTokenOffsetsException {
@@ -154,7 +155,7 @@ public class HighlighterTest extends Bas
@Override
public Query rewrite(IndexReader reader) throws IOException {
CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 3);
- query.add(new Term(FIELD_NAME, "this"));
+ query.add(new Term(FIELD_NAME, "this"));//stop-word
query.add(new Term(FIELD_NAME, "long"));
query.add(new Term(FIELD_NAME, "very"));
return query;
@@ -175,9 +176,7 @@ public class HighlighterTest extends Bas
return super.equals(obj);
}
};
-
- Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
-
+
searcher = newSearcher(reader);
TopDocs hits = searcher.search(query, 10);
assertEquals(2, hits.totalHits);
@@ -192,7 +191,7 @@ public class HighlighterTest extends Bas
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
highlighter.setTextFragmenter(fragmenter);
String fragment = highlighter.getBestFragment(stream, storedField);
- assertEquals("Hello <B>this</B> is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
+ assertEquals("Hello this is a piece of text that is <B>very</B> <B>long</B> and contains too much preamble and the meat is really here which says kennedy has been shot", fragment);
doc = searcher.doc(hits.scoreDocs[1].doc);
storedField = doc.getString(FIELD_NAME);
@@ -201,7 +200,7 @@ public class HighlighterTest extends Bas
.getIndexReader(), hits.scoreDocs[1].doc, FIELD_NAME, doc, analyzer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer));
fragment = highlighter.getBestFragment(stream, storedField);
- assertEquals("<B>This</B> piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
+ assertEquals("This piece of text refers to Kennedy at the beginning then has a longer piece of text that is <B>very</B>", fragment);
}
@@ -247,8 +246,7 @@ public class HighlighterTest extends Bas
*/
private String highlightField(Query query, String fieldName, String text)
throws IOException, InvalidTokenOffsetsException {
- TokenStream tokenStream = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)
- .tokenStream(fieldName, text);
+ TokenStream tokenStream = analyzer.tokenStream(fieldName, text);
// Assuming "<B>", "</B>" used to highlight
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
@@ -346,8 +344,9 @@ public class HighlighterTest extends Bas
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@@ -375,8 +374,9 @@ public class HighlighterTest extends Bas
highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@@ -404,8 +404,9 @@ public class HighlighterTest extends Bas
highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@@ -429,8 +430,9 @@ public class HighlighterTest extends Bas
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@@ -453,8 +455,9 @@ public class HighlighterTest extends Bas
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@@ -477,9 +480,9 @@ public class HighlighterTest extends Bas
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
-
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
@@ -562,7 +565,8 @@ public class HighlighterTest extends Bas
for (int i = 0; i < hits.totalHits; i++) {
String text = "parent document";
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...");
@@ -587,8 +591,9 @@ public class HighlighterTest extends Bas
highlighter.setTextFragmenter(new SimpleFragmenter(40));
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"...");
@@ -609,8 +614,9 @@ public class HighlighterTest extends Bas
int maxNumFragmentsRequired = 2;
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
Highlighter highlighter = new Highlighter(this, scorer);
@@ -639,8 +645,9 @@ public class HighlighterTest extends Bas
Highlighter highlighter = new Highlighter(this, scorer);
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
@@ -693,8 +700,9 @@ public class HighlighterTest extends Bas
Highlighter highlighter = new Highlighter(this,scorer);
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@@ -764,8 +772,9 @@ public class HighlighterTest extends Bas
highlighter.setTextFragmenter(new SimpleFragmenter(40));
int maxNumFragmentsRequired = 2;
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
"...");
@@ -958,11 +967,12 @@ public class HighlighterTest extends Bas
hits = searcher.search(query, null, 1000);
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(HighlighterTest.FIELD_NAME);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2;
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
String fragmentSeparator = "...";
QueryScorer scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, text);
Highlighter highlighter = new Highlighter(this, scorer);
@@ -982,11 +992,12 @@ public class HighlighterTest extends Bas
numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(HighlighterTest.FIELD_NAME);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(HighlighterTest.FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
int maxNumFragmentsRequired = 2;
String fragmentSeparator = "...";
QueryScorer scorer = new QueryScorer(query, null);
- TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, text);
Highlighter highlighter = new Highlighter(this, scorer);
@@ -1006,11 +1017,12 @@ public class HighlighterTest extends Bas
numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(HighlighterTest.FIELD_NAME);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2;
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
String fragmentSeparator = "...";
QueryScorer scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, text);
Highlighter highlighter = new Highlighter(this, scorer);
@@ -1180,8 +1192,9 @@ public class HighlighterTest extends Bas
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);
@@ -1194,21 +1207,25 @@ public class HighlighterTest extends Bas
numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);
- highlighter.getBestFragment(analyzer, FIELD_NAME, text);
+ highlighter.getBestFragment(tokenStream, text);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 4);
numHighlights = 0;
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);
- highlighter.getBestFragments(analyzer, FIELD_NAME, text, 10);
+ highlighter.getBestFragments(tokenStream, text, 10);
}
assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
numHighlights == 4);
@@ -1334,8 +1351,9 @@ public class HighlighterTest extends Bas
doSearching(new TermQuery(new Term(FIELD_NAME, "kennedy")));
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
HighlighterTest.this);// new Highlighter(this, new
@@ -1363,9 +1381,10 @@ public class HighlighterTest extends Bas
}
public void testMaxSizeHighlight() throws Exception {
- final MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
- // we disable MockTokenizer checks because we will forcefully limit the
+ // we disable MockTokenizer checks because we will forcefully limit the
// tokenstream and call end() before incrementToken() returns false.
+ // But we first need to clear the re-used tokenstream components that have enableChecks.
+ analyzer.getReuseStrategy().setReusableComponents(analyzer, FIELD_NAME, null);
analyzer.setEnableChecks(false);
TestHighlightRunner helper = new TestHighlightRunner() {
@@ -1466,8 +1485,7 @@ public class HighlighterTest extends Bas
numHighlights = 0;
// test to show how rewritten query can still be used
searcher = newSearcher(reader);
- Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
-
+
BooleanQuery query = new BooleanQuery();
query.add(new WildcardQuery(new Term(FIELD_NAME, "jf?")), Occur.SHOULD);
query.add(new WildcardQuery(new Term(FIELD_NAME, "kenned*")), Occur.SHOULD);
@@ -1486,8 +1504,9 @@ public class HighlighterTest extends Bas
int maxNumFragmentsRequired = 3;
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(FIELD_NAME);
- TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, text);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(FIELD_NAME);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, hits.scoreDocs[i].doc, FIELD_NAME, doc, analyzer);
Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this, false);
highlighter.setTextFragmenter(new SimpleFragmenter(40));
@@ -1820,7 +1839,7 @@ public class HighlighterTest extends Bas
private Document doc(IndexWriter writer, String f, String v) {
Document doc = writer.newDocument();
- doc.addLargeText( f, v);
+ doc.addLargeText(f, v);
return doc;
}
@@ -1862,6 +1881,39 @@ public class HighlighterTest extends Bas
reader.close();
}
+ /** If we have term vectors, we can highlight based on payloads */
+ public void testPayloadQuery() throws IOException, InvalidTokenOffsetsException {
+ final String text = "random words and words";//"words" at positions 1 & 4
+
+ Analyzer analyzer = new MockPayloadAnalyzer();//sets payload to "pos: X" (where X is position #)
+ Directory dir = newDirectory();
+ try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer))) {
+ Document doc = writer.newDocument();
+ FieldTypes fieldTypes = writer.getFieldTypes();
+ fieldTypes.enableTermVectors(FIELD_NAME);
+ fieldTypes.enableTermVectorPositions(FIELD_NAME);
+ fieldTypes.enableTermVectorOffsets(FIELD_NAME);
+ fieldTypes.enableTermVectorPayloads(FIELD_NAME);
+ doc.addLargeText(FIELD_NAME, text);
+ writer.addDocument(doc);
+ writer.commit();
+ }
+ try (IndexReader reader = DirectoryReader.open(dir)) {
+ Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "words")),
+ Collections.singleton("pos: 1".getBytes("UTF-8")));//just match the first "word" occurrence
+ IndexSearcher searcher = newSearcher(reader);
+ Scorer scorer = new QueryScorer(query, searcher.getIndexReader(), FIELD_NAME);
+ Highlighter h = new Highlighter(scorer);
+
+ TopDocs hits = searcher.search(query, null, 10);
+ assertEquals(1, hits.scoreDocs.length);
+ TokenStream stream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), 0, FIELD_NAME, analyzer);
+ String result = h.getBestFragment(stream, text);
+ assertEquals("random <B>words</B> and words", result);//only highlight first "word"
+ }
+ dir.close();
+ }
+
/*
*
* public void testBigramAnalyzer() throws IOException, ParseException {
@@ -1929,14 +1981,26 @@ public class HighlighterTest extends Bas
public void setUp() throws Exception {
super.setUp();
+ //Not many use this setup:
a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
- analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
dir = newDirectory();
+
+ //Most tests use this setup:
+ analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
ramDir = newDirectory();
IndexWriter writer = new IndexWriter(ramDir, newIndexWriterConfig(analyzer));
+ if (random().nextBoolean()) {
+ FieldTypes fieldTypes = writer.getFieldTypes();
+ fieldTypes.enableTermVectors(FIELD_NAME);
+ fieldTypes.enableTermVectorPositions(FIELD_NAME);
+ fieldTypes.enableTermVectorOffsets(FIELD_NAME);
+ fieldTypes.enableTermVectorPayloads(FIELD_NAME);
+ }
for (String text : texts) {
- addDoc(writer, text);
+ writer.addDocument(doc(writer, FIELD_NAME, text));
}
+
+ // a few tests need other docs...:
Document doc = writer.newDocument();
doc.addInt(NUMERIC_FIELD_NAME, 1);
writer.addDocument(doc);
@@ -1960,6 +2024,8 @@ public class HighlighterTest extends Bas
writer.forceMerge(1);
writer.close();
reader = DirectoryReader.open(ramDir);
+
+ //Misc:
numHighlights = 0;
}
@@ -1970,11 +2036,6 @@ public class HighlighterTest extends Bas
ramDir.close();
super.tearDown();
}
- private void addDoc(IndexWriter writer, String text) throws IOException {
- Document d = writer.newDocument();
- d.addLargeText(FIELD_NAME, text);
- writer.addDocument(d);
- }
private static Token createToken(String term, int start, int offset)
{
@@ -2152,11 +2213,13 @@ final class SynonymTokenizer extends Tok
throws Exception {
for (int i = 0; i < hits.totalHits; i++) {
- String text = searcher.doc(hits.scoreDocs[i].doc).getString(HighlighterTest.FIELD_NAME);
+ Document doc = searcher.doc(hits.scoreDocs[i].doc);
+ String text = doc.getString(HighlighterTest.FIELD_NAME);
int maxNumFragmentsRequired = 2;
String fragmentSeparator = "...";
Scorer scorer = null;
- TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, text);
+ TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(),
+ hits.scoreDocs[i].doc, HighlighterTest.FIELD_NAME, doc, analyzer);
if (mode == QUERY) {
scorer = new QueryScorer(query);
} else if (mode == QUERY_TERM) {
@@ -2164,7 +2227,6 @@ final class SynonymTokenizer extends Tok
}
Highlighter highlighter = new Highlighter(formatter, scorer);
highlighter.setTextFragmenter(frag);
-
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
fragmentSeparator);
if (LuceneTestCase.VERBOSE) System.out.println("\t" + result);
Modified: lucene/dev/branches/lucene6005/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/lucene6005/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Sun Dec 7 11:37:32 2014
@@ -18,13 +18,12 @@ package org.apache.lucene.index.memory;
*/
import java.io.IOException;
-import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
-import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
-import java.util.NoSuchElementException;
+import java.util.SortedMap;
+import java.util.TreeMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@@ -61,17 +60,16 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.IntBlockPool.SliceReader;
import org.apache.lucene.util.IntBlockPool.SliceWriter;
-import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.RecyclingByteBlockAllocator;
import org.apache.lucene.util.RecyclingIntBlockAllocator;
-
/**
* High-performance single-document main memory Apache Lucene fulltext search index.
* <p>
@@ -190,10 +188,7 @@ import org.apache.lucene.util.RecyclingI
public class MemoryIndex {
/** info for each field: Map<String fieldName, Info field> */
- private final HashMap<String,Info> fields = new HashMap<>();
-
- /** fields sorted ascending by fieldName; lazily computed on demand */
- private transient Map.Entry<String,Info>[] sortedFields;
+ private final SortedMap<String,Info> fields = new TreeMap<>();
private final boolean storeOffsets;
@@ -203,29 +198,12 @@ public class MemoryIndex {
private final IntBlockPool intBlockPool;
// private final IntBlockPool.SliceReader postingsReader;
private final IntBlockPool.SliceWriter postingsWriter;
-
- private HashMap<String,FieldInfo> fieldInfos = new HashMap<>();
private Counter bytesUsed;
private boolean frozen = false;
private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();
-
- /**
- * Sorts term entries into ascending order; also works for
- * Arrays.binarySearch() and Arrays.sort()
- */
- private static final Comparator<Object> termComparator = new Comparator<Object>() {
- @Override
- @SuppressWarnings({"unchecked","rawtypes"})
- public int compare(Object o1, Object o2) {
- if (o1 instanceof Map.Entry<?,?>) o1 = ((Map.Entry<?,?>) o1).getKey();
- if (o2 instanceof Map.Entry<?,?>) o2 = ((Map.Entry<?,?>) o2).getKey();
- if (o1 == o2) return 0;
- return ((Comparable) o1).compareTo((Comparable) o2);
- }
- };
/**
* Constructs an empty instance.
@@ -247,7 +225,6 @@ public class MemoryIndex {
*/
public MemoryIndex(boolean storeOffsets) {
this(storeOffsets, 0);
-
}
/**
@@ -296,7 +273,7 @@ public class MemoryIndex {
addField(fieldName, stream, 1.0f, analyzer.getPositionIncrementGap(fieldName), analyzer.getOffsetGap(fieldName));
}
-
+
/**
* Convenience method; Creates and returns a token stream that generates a
* token for each keyword in the given collection, "as is", without any
@@ -429,10 +406,12 @@ public class MemoryIndex {
int pos = -1;
final BytesRefHash terms;
final SliceByteStartArray sliceArray;
- Info info = null;
+ Info info;
long sumTotalTermFreq = 0;
int offset = 0;
+ FieldInfo fieldInfo;
if ((info = fields.get(fieldName)) != null) {
+ fieldInfo = info.fieldInfo;
numTokens = info.numTokens;
numOverlapTokens = info.numOverlapTokens;
pos = info.lastPosition + positionIncrementGap;
@@ -442,16 +421,13 @@ public class MemoryIndex {
sliceArray = info.sliceArray;
sumTotalTermFreq = info.sumTotalTermFreq;
} else {
+ fieldInfo = new FieldInfo(fieldName, fields.size(), false, false, false,
+ this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
+ DocValuesType.NONE, -1, null);
sliceArray = new SliceByteStartArray(BytesRefHash.DEFAULT_CAPACITY);
terms = new BytesRefHash(byteBlockPool, BytesRefHash.DEFAULT_CAPACITY, sliceArray);
}
- if (!fieldInfos.containsKey(fieldName)) {
- fieldInfos.put(fieldName,
- new FieldInfo(fieldName, fieldInfos.size(), false, false, false,
- this.storeOffsets ? IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS,
- DocValuesType.NONE, -1, null));
- }
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
@@ -488,8 +464,7 @@ public class MemoryIndex {
// ensure infos.numTokens > 0 invariant; needed for correct operation of terms()
if (numTokens > 0) {
- fields.put(fieldName, new Info(terms, sliceArray, numTokens, numOverlapTokens, boost, pos, offsetAtt.endOffset() + offset, sumTotalTermFreq));
- sortedFields = null; // invalidate sorted view, if any
+ fields.put(fieldName, new Info(fieldInfo, terms, sliceArray, numTokens, numOverlapTokens, boost, pos, offsetAtt.endOffset() + offset, sumTotalTermFreq));
}
} catch (Exception e) { // can never happen
throw new RuntimeException(e);
@@ -510,7 +485,13 @@ public class MemoryIndex {
public void setSimilarity(Similarity similarity) {
if (frozen)
throw new IllegalArgumentException("Cannot set Similarity when MemoryIndex is frozen");
+ if (this.normSimilarity == similarity)
+ return;
this.normSimilarity = similarity;
+ //invalidate any cached norms that may exist
+ for (Info info : fields.values()) {
+ info.norms = null;
+ }
}
/**
@@ -528,17 +509,16 @@ public class MemoryIndex {
/**
* Prepares the MemoryIndex for querying in a non-lazy way.
- *
+ * <p>
* After calling this you can query the MemoryIndex from multiple threads, but you
* cannot subsequently add new data.
*/
public void freeze() {
this.frozen = true;
- sortFields();
- for (Map.Entry<String,Info> info : sortedFields) {
- info.getValue().sortTerms();
+ for (Info info : fields.values()) {
+ info.sortTerms();
+ info.getNormDocValues();//lazily computed
}
- calculateNormValues();
}
/**
@@ -589,7 +569,7 @@ public class MemoryIndex {
* NOT close the index reader!!! This avoids all sorts of
* unnecessary baggage and locking in the Lucene IndexReader
* superclass, all of which is completely unnecessary for this main
- * memory index data structure without thread-safety claims.
+ * memory index data structure.
*
* Wishing IndexReader would be an interface...
*
@@ -600,26 +580,6 @@ public class MemoryIndex {
}
}
- /** sorts into ascending order (on demand), reusing memory along the way */
- private void sortFields() {
- if (sortedFields == null) sortedFields = sort(fields);
- }
-
- /** returns a view of the given map's entries, sorted ascending by key */
- private static <K,V> Map.Entry<K,V>[] sort(HashMap<K,V> map) {
- int size = map.size();
- @SuppressWarnings("unchecked")
- Map.Entry<K,V>[] entries = new Map.Entry[size];
-
- Iterator<Map.Entry<K,V>> iter = map.entrySet().iterator();
- for (int i=0; i < size; i++) {
- entries[i] = iter.next();
- }
-
- if (size > 1) ArrayUtil.introSort(entries, termComparator);
- return entries;
- }
-
/**
* Returns a String representation of the index data for debugging purposes.
*
@@ -627,13 +587,11 @@ public class MemoryIndex {
*/
@Override
public String toString() {
- StringBuilder result = new StringBuilder(256);
- sortFields();
+ StringBuilder result = new StringBuilder(256);
int sumPositions = 0;
int sumTerms = 0;
final BytesRef spare = new BytesRef();
- for (int i=0; i < sortedFields.length; i++) {
- Map.Entry<String,Info> entry = sortedFields[i];
+ for (Map.Entry<String, Info> entry : fields.entrySet()) {
String fieldName = entry.getKey();
Info info = entry.getValue();
info.sortTerms();
@@ -641,20 +599,20 @@ public class MemoryIndex {
SliceByteStartArray sliceArray = info.sliceArray;
int numPositions = 0;
SliceReader postingsReader = new SliceReader(intBlockPool);
- for (int j=0; j < info.terms.size(); j++) {
+ for (int j = 0; j < info.terms.size(); j++) {
int ord = info.sortedTerms[j];
info.terms.get(ord, spare);
int freq = sliceArray.freq[ord];
result.append("\t'" + spare + "':" + freq + ":");
postingsReader.reset(sliceArray.start[ord], sliceArray.end[ord]);
result.append(" [");
- final int iters = storeOffsets ? 3 : 1;
- while(!postingsReader.endOfSlice()) {
+ final int iters = storeOffsets ? 3 : 1;
+ while (!postingsReader.endOfSlice()) {
result.append("(");
-
+
for (int k = 0; k < iters; k++) {
result.append(postingsReader.readInt());
- if (k < iters-1) {
+ if (k < iters - 1) {
result.append(", ");
}
}
@@ -662,13 +620,13 @@ public class MemoryIndex {
if (!postingsReader.endOfSlice()) {
result.append(",");
}
-
+
}
result.append("]");
result.append("\n");
numPositions += freq;
}
-
+
result.append("\tterms=" + info.terms.size());
result.append(", positions=" + numPositions);
result.append("\n");
@@ -676,26 +634,31 @@ public class MemoryIndex {
sumTerms += info.terms.size();
}
- result.append("\nfields=" + sortedFields.length);
+ result.append("\nfields=" + fields.size());
result.append(", terms=" + sumTerms);
result.append(", positions=" + sumPositions);
return result.toString();
}
/**
- * Index data structure for a field; Contains the tokenized term texts and
+ * Index data structure for a field; contains the tokenized term texts and
* their positions.
*/
- private static final class Info {
-
+ private final class Info {
+
+ private final FieldInfo fieldInfo;
+
+ /** The norms for this field; computed on demand. */
+ private transient NumericDocValues norms;
+
/**
* Term strings and their positions for this field: Map <String
* termText, ArrayIntList positions>
*/
- private final BytesRefHash terms;
+ private final BytesRefHash terms; // note unfortunate variable name class with Terms type
private final SliceByteStartArray sliceArray;
-
+
/** Terms sorted ascending by term text; computed on demand */
private transient int[] sortedTerms;
@@ -716,7 +679,8 @@ public class MemoryIndex {
/** the last offset encountered in this field for multi field support*/
private final int lastOffset;
- public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq) {
+ public Info(FieldInfo fieldInfo, BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq) {
+ this.fieldInfo = fieldInfo;
this.terms = terms;
this.sliceArray = sliceArray;
this.numTokens = numTokens;
@@ -727,10 +691,6 @@ public class MemoryIndex {
this.lastOffset = lastOffset;
}
- public long getSumTotalTermFreq() {
- return sumTotalTermFreq;
- }
-
/**
* Sorts hashed terms into ascending order, reusing memory along the
* way. Note that sorting is lazily delayed until required (often it's
@@ -740,12 +700,30 @@ public class MemoryIndex {
* apart from more sophisticated Tries / prefix trees).
*/
public void sortTerms() {
- if (sortedTerms == null)
+ if (sortedTerms == null) {
sortedTerms = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
+ }
}
-
- public float getBoost() {
- return boost;
+
+ public NumericDocValues getNormDocValues() {
+ if (norms == null) {
+ FieldInvertState invertState = new FieldInvertState(fieldInfo.name, fieldInfo.number,
+ numTokens, numOverlapTokens, 0, boost);
+ final long value = normSimilarity.computeNorm(invertState);
+ if (DEBUG) System.err.println("MemoryIndexReader.norms: " + fieldInfo.name + ":" + value + ":" + numTokens);
+ norms = new NumericDocValues() {
+
+ @Override
+ public long get(int docID) {
+ if (docID != 0)
+ throw new IndexOutOfBoundsException();
+ else
+ return value;
+ }
+
+ };
+ }
+ return norms;
}
}
@@ -776,10 +754,6 @@ public class MemoryIndex {
private Info getInfo(String fieldName) {
return fields.get(fieldName);
}
-
- private Info getInfo(int pos) {
- return sortedFields[pos].getValue();
- }
@Override
public Bits getLiveDocs() {
@@ -788,7 +762,12 @@ public class MemoryIndex {
@Override
public FieldInfos getFieldInfos() {
- return new FieldInfos(fieldInfos.values().toArray(new FieldInfo[fieldInfos.size()]));
+ FieldInfo[] fieldInfos = new FieldInfo[fields.size()];
+ int i = 0;
+ for (Info info : fields.values()) {
+ fieldInfos[i++] = info.fieldInfo;
+ }
+ return new FieldInfos(fieldInfos);
}
@Override
@@ -829,98 +808,72 @@ public class MemoryIndex {
private class MemoryFields extends Fields {
@Override
public Iterator<String> iterator() {
- return new Iterator<String>() {
- int upto = -1;
+ return fields.keySet().iterator();
+ }
+
+ @Override
+ public Terms terms(final String field) {
+ final Info info = fields.get(field);
+ if (info == null)
+ return null;
+ return new Terms() {
@Override
- public String next() {
- upto++;
- if (upto >= sortedFields.length) {
- throw new NoSuchElementException();
- }
- return sortedFields[upto].getKey();
+ public TermsEnum iterator(TermsEnum reuse) {
+ return new MemoryTermsEnum(info);
}
@Override
- public boolean hasNext() {
- return upto+1 < sortedFields.length;
+ public long size() {
+ return info.terms.size();
}
@Override
- public void remove() {
- throw new UnsupportedOperationException();
+ public long getSumTotalTermFreq() {
+ return info.sumTotalTermFreq;
}
- };
- }
-
- @Override
- public Terms terms(final String field) {
- int i = Arrays.binarySearch(sortedFields, field, termComparator);
- if (i < 0) {
- return null;
- } else {
- final Info info = getInfo(i);
- info.sortTerms();
-
- return new Terms() {
- @Override
- public TermsEnum iterator(TermsEnum reuse) {
- return new MemoryTermsEnum(info);
- }
-
- @Override
- public long size() {
- return info.terms.size();
- }
- @Override
- public long getSumTotalTermFreq() {
- return info.getSumTotalTermFreq();
- }
+ @Override
+ public long getSumDocFreq() {
+ // each term has df=1
+ return info.terms.size();
+ }
- @Override
- public long getSumDocFreq() {
- // each term has df=1
- return info.terms.size();
- }
+ @Override
+ public int getDocCount() {
+ return size() > 0 ? 1 : 0;
+ }
- @Override
- public int getDocCount() {
- return info.terms.size() > 0 ? 1 : 0;
- }
+ @Override
+ public boolean hasFreqs() {
+ return true;
+ }
- @Override
- public boolean hasFreqs() {
- return true;
- }
+ @Override
+ public boolean hasOffsets() {
+ return storeOffsets;
+ }
- @Override
- public boolean hasOffsets() {
- return storeOffsets;
- }
+ @Override
+ public boolean hasPositions() {
+ return true;
+ }
- @Override
- public boolean hasPositions() {
- return true;
- }
-
- @Override
- public boolean hasPayloads() {
- return false;
- }
- };
- }
+ @Override
+ public boolean hasPayloads() {
+ return false;
+ }
+ };
}
@Override
public int size() {
- return sortedFields.length;
+ return fields.size();
}
}
@Override
public Fields fields() {
- sortFields();
return new MemoryFields();
}
@@ -1209,44 +1162,20 @@ public class MemoryIndex {
@Override
public NumericDocValues getNormValues(String field) {
- if (norms == null)
- return calculateFieldNormValue(field);
- return norms.get(field);
+ Info info = fields.get(field);
+ if (info == null) {
+ return null;
+ }
+ return info.getNormDocValues();
}
}
- private Map<String, NumericDocValues> norms = null;
-
- private NumericDocValues calculateFieldNormValue(String field) {
- FieldInfo fieldInfo = fieldInfos.get(field);
- if (fieldInfo == null)
- return null;
- Info info = fields.get(field);
- int numTokens = info != null ? info.numTokens : 0;
- int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
- float boost = info != null ? info.getBoost() : 1.0f;
- FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
- long value = normSimilarity.computeNorm(invertState);
- if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
- return new MemoryIndexNormDocValues(value);
- }
-
- private void calculateNormValues() {
- norms = new HashMap<>();
- for (String field : fieldInfos.keySet()) {
- norms.put(field, calculateFieldNormValue(field));
- }
- }
-
/**
* Resets the {@link MemoryIndex} to its initial state and recycles all internal buffers.
*/
public void reset() {
- this.fieldInfos.clear();
- this.fields.clear();
- this.sortedFields = null;
- this.norms = null;
+ fields.clear();
this.normSimilarity = IndexSearcher.getDefaultSimilarity();
byteBlockPool.reset(false, false); // no need to 0-fill the buffers
intBlockPool.reset(true, false); // here must must 0-fill since we use slices
Modified: lucene/dev/branches/lucene6005/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java (original)
+++ lucene/dev/branches/lucene6005/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java Sun Dec 7 11:37:32 2014
@@ -100,7 +100,7 @@ public class TestMemoryIndex extends Luc
LeafReader reader = (LeafReader) searcher.getIndexReader();
float n1 = reader.getNormValues("f1").get(0);
- // Norms aren't cached, so we can change the Similarity
+ // Norms are re-computed when we change the Similarity
mi.setSimilarity(new DefaultSimilarity() {
@Override
public float lengthNorm(FieldInvertState state) {
Modified: lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/index/IndexSplitter.java Sun Dec 7 11:37:32 2014
@@ -24,6 +24,7 @@ import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
import java.util.List;
import java.util.Locale;
@@ -137,7 +138,7 @@ public class IndexSplitter {
SegmentInfo info = infoPerCommit.info;
// Same info just changing the dir:
SegmentInfo newInfo = new SegmentInfo(destFSDir, info.getVersion(), info.name, info.getDocCount(),
- info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId());
+ info.getUseCompoundFile(), info.getCodec(), info.getDiagnostics(), info.getId(), new HashMap<>());
destInfos.add(new SegmentCommitInfo(newInfo, infoPerCommit.getDelCount(),
infoPerCommit.getDelGen(), infoPerCommit.getFieldInfosGen(),
infoPerCommit.getDocValuesGen()));
Modified: lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java (original)
+++ lucene/dev/branches/lucene6005/lucene/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java Sun Dec 7 11:37:32 2014
@@ -166,6 +166,7 @@ public class NativeUnixDirectory extends
private boolean isOpen;
public NativeUnixIndexOutput(Path path, int bufferSize) throws IOException {
+ super("NativeUnixIndexOutput(path=\"" + path.toString() + "\")");
//this.path = path;
final FileDescriptor fd = NativePosixUtil.open_direct(path.toString(), false);
fos = new FileOutputStream(fd);
Modified: lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java (original)
+++ lucene/dev/branches/lucene6005/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java Sun Dec 7 11:37:32 2014
@@ -33,10 +33,12 @@ import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.RamUsageEstimator;
/**
* Constructs a filter for docs matching any of the terms added to this class.
@@ -45,7 +47,9 @@ import org.apache.lucene.util.BytesRef;
* a choice of "category" labels picked by the end user. As a filter, this is much faster than the
* equivalent query (a BooleanQuery with many "should" TermQueries)
*/
-public final class TermsFilter extends Filter {
+public final class TermsFilter extends Filter implements Accountable {
+
+ private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermsFilter.class);
/*
* this class is often used for large number of terms in a single field.
@@ -178,7 +182,14 @@ public final class TermsFilter extends F
this.hashCode = hash;
}
-
+
+ @Override
+ public long ramBytesUsed() {
+ return BASE_RAM_BYTES_USED
+ + RamUsageEstimator.sizeOf(termsAndFields)
+ + RamUsageEstimator.sizeOf(termsBytes)
+ + RamUsageEstimator.sizeOf(offsets);
+ }
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
@@ -254,7 +265,13 @@ public final class TermsFilter extends F
return builder.toString();
}
- private static final class TermsAndField {
+ private static final class TermsAndField implements Accountable {
+
+ private static final long BASE_RAM_BYTES_USED =
+ RamUsageEstimator.shallowSizeOfInstance(TermsAndField.class)
+ + RamUsageEstimator.shallowSizeOfInstance(String.class)
+ + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // header of the array held by the String
+
final int start;
final int end;
final String field;
@@ -268,6 +285,13 @@ public final class TermsFilter extends F
}
@Override
+ public long ramBytesUsed() {
+ // this is an approximation since we don't actually know how strings store
+ // their data, which can be JVM-dependent
+ return BASE_RAM_BYTES_USED + field.length() * RamUsageEstimator.NUM_BYTES_CHAR;
+ }
+
+ @Override
public int hashCode() {
final int prime = 31;
int result = 1;
@@ -317,4 +341,5 @@ public final class TermsFilter extends F
Collections.sort(toSort);
return toSort;
}
+
}
Modified: lucene/dev/branches/lucene6005/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java Sun Dec 7 11:37:32 2014
@@ -48,8 +48,11 @@ import org.apache.lucene.store.Directory
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.RamUsageTester;
import org.apache.lucene.util.TestUtil;
+import com.carrotsearch.randomizedtesting.generators.RandomStrings;
+
public class TermsFilterTest extends LuceneTestCase {
public void testCachability() throws Exception {
@@ -334,4 +337,18 @@ public class TermsFilterTest extends Luc
new Term("field1", "c"));
assertEquals("field1:a field1:b field1:c", termsFilter.toString());
}
+
+ public void testRamBytesUsed() {
+ List<Term> terms = new ArrayList<>();
+ final int numTerms = 1000 + random().nextInt(1000);
+ for (int i = 0; i < numTerms; ++i) {
+ terms.add(new Term("f", RandomStrings.randomUnicodeOfLength(random(), 10)));
+ }
+ TermsFilter filter = new TermsFilter(terms);
+ final long actualRamBytesUsed = RamUsageTester.sizeOf(filter);
+ final long expectedRamBytesUsed = filter.ramBytesUsed();
+ // error margin within 1%
+ assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 100);
+ }
+
}
Modified: lucene/dev/branches/lucene6005/lucene/sandbox/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/sandbox/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/sandbox/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java (original)
+++ lucene/dev/branches/lucene6005/lucene/sandbox/src/test/org/apache/lucene/codecs/idversion/TestIDVersionPostingsFormat.java Sun Dec 7 11:37:32 2014
@@ -50,6 +50,7 @@ import org.apache.lucene.search.IndexSea
import org.apache.lucene.search.LiveFieldValues;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
@@ -633,8 +634,21 @@ public class TestIDVersionPostingsFormat
} catch (IllegalArgumentException iae) {
// expected
}
+ try {
+ w.addDocument(doc);
+ fail("should have hit exc");
+ } catch (AlreadyClosedException ace) {
+ // expected
+ }
+ dir.close();
+ }
- doc = w.newDocument();
+ public void testInvalidVersions2() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
+ Document doc = w.newDocument();
// Long.MAX_VALUE:
doc.addLargeText("id", makeIDTokenStream("id", new BytesRef(new byte[] {(byte)0x7f, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff, (byte)0xff})));
try {
@@ -644,7 +658,12 @@ public class TestIDVersionPostingsFormat
} catch (IllegalArgumentException iae) {
// expected
}
- w.close();
+ try {
+ w.addDocument(doc);
+ fail("should have hit exc");
+ } catch (AlreadyClosedException ace) {
+ // expected
+ }
dir.close();
}
Modified: lucene/dev/branches/lucene6005/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java (original)
+++ lucene/dev/branches/lucene6005/lucene/spatial/src/java/org/apache/lucene/spatial/prefix/tree/NumberRangePrefixTree.java Sun Dec 7 11:37:32 2014
@@ -232,20 +232,34 @@ public abstract class NumberRangePrefixT
public Shape toRangeShape(Shape start, Shape end) {
if (!(start instanceof LevelledValue && end instanceof LevelledValue))
throw new IllegalArgumentException("Must pass "+LevelledValue.class+" but got "+start.getClass());
- LevelledValue minLV = (LevelledValue) start;
- LevelledValue maxLV = (LevelledValue) end;
- if (minLV.equals(maxLV))
- return minLV;
- //Optimize precision of the range, e.g. April 1st to April 30th is April.
- minLV = minLV.getLVAtLevel(truncateStartVals(minLV, 0));
- maxLV = maxLV.getLVAtLevel(truncateEndVals(maxLV, 0));
- int cmp = comparePrefixLV(minLV, maxLV);
+ LevelledValue startLV = (LevelledValue) start;
+ LevelledValue endLV = (LevelledValue) end;
+ //note: this normalization/optimization process is actually REQUIRED based on assumptions elsewhere.
+ //Normalize start & end
+ startLV = startLV.getLVAtLevel(truncateStartVals(startLV, 0)); // chops off trailing min-vals (zeroes)
+ endLV = endLV.getLVAtLevel(truncateEndVals(endLV, 0)); // chops off trailing max-vals
+ //Optimize to just start or end if it's equivalent, e.g. April to April 1st is April 1st.
+ int cmp = comparePrefixLV(startLV, endLV);
if (cmp > 0) {
throw new IllegalArgumentException("Wrong order: "+start+" TO "+end);
}
- if (cmp == 0 && minLV.getLevel() == maxLV.getLevel())
- return minLV;
- return new NRShape(minLV, maxLV);
+ if (cmp == 0) {//one is a prefix of the other
+ if (startLV.getLevel() == endLV.getLevel()) {
+ //same
+ return startLV;
+ } else if (endLV.getLevel() > startLV.getLevel()) {
+ // e.g. April to April 1st
+ if (truncateStartVals(endLV, startLV.getLevel()) == startLV.getLevel()) {
+ return endLV;
+ }
+ } else {//minLV level > maxLV level
+ // e.g. April 30 to April
+ if (truncateEndVals(startLV, endLV.getLevel()) == endLV.getLevel()) {
+ return startLV;
+ }
+ }
+ }
+ return new NRShape(startLV, endLV);
}
/** From lv.getLevel on up, it returns the first Level seen with val != 0. It doesn't check past endLevel. */
Modified: lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/DateNRStrategyTest.java Sun Dec 7 11:37:32 2014
@@ -27,6 +27,7 @@ import org.junit.Ignore;
import org.junit.Test;
import java.io.IOException;
+import java.text.ParseException;
import java.util.Calendar;
public class DateNRStrategyTest extends RandomSpatialOpStrategyTestCase {
@@ -65,12 +66,6 @@ public class DateNRStrategyTest extends
testOperationRandomShapes(SpatialOperation.Contains);
}
- @Test @Ignore("see LUCENE-5692")
- @Repeat(iterations = ITERATIONS)
- public void testDisjoint() throws IOException {
- testOperationRandomShapes(SpatialOperation.IsDisjointTo);
- }
-
@Test
public void testWithinSame() throws IOException {
final Calendar cal = tree.newCal();
Modified: lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java (original)
+++ lucene/dev/branches/lucene6005/lucene/spatial/src/test/org/apache/lucene/spatial/prefix/tree/DateRangePrefixTreeTest.java Sun Dec 7 11:37:32 2014
@@ -157,7 +157,9 @@ public class DateRangePrefixTreeTest ext
assertEquals("2014", tree.parseShape("[2014-01-01 TO 2014-12-31]").toString());
- assertEquals("2014", tree.parseShape("[2014-01 TO 2014]").toString());
+ assertEquals("2014", tree.parseShape("[2014-01 TO 2014]").toString());
+ assertEquals("2014-01", tree.parseShape("[2014 TO 2014-01]").toString());
+ assertEquals("2014-12", tree.parseShape("[2014-12 TO 2014]").toString());
assertEquals("[2014 TO 2014-04-06]", tree.parseShape("[2014-01 TO 2014-04-06]").toString());
Modified: lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java (original)
+++ lucene/dev/branches/lucene6005/lucene/suggest/src/java/org/apache/lucene/search/spell/SpellChecker.java Sun Dec 7 11:37:32 2014
@@ -496,6 +496,7 @@ public class SpellChecker implements jav
fieldTypes.setMultiValued("start" + ng);
fieldTypes.disableStored("gram" + ng);
+ fieldTypes.disableFastRanges("gram" + ng);
fieldTypes.setIndexOptions("gram" + ng, IndexOptions.DOCS_AND_FREQS);
fieldTypes.disableSorting("gram" + ng);
fieldTypes.setMultiValued("gram" + ng);
Modified: lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java (original)
+++ lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingStoredFieldsFormat.java Sun Dec 7 11:37:32 2014
@@ -143,11 +143,6 @@ public class AssertingStoredFieldsFormat
}
@Override
- public void abort() {
- in.abort();
- }
-
- @Override
public void finish(FieldInfos fis, int numDocs) throws IOException {
assert docStatus == (numDocs > 0 ? Status.FINISHED : Status.UNDEFINED);
in.finish(fis, numDocs);
Modified: lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java (original)
+++ lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingTermVectorsFormat.java Sun Dec 7 11:37:32 2014
@@ -195,11 +195,6 @@ public class AssertingTermVectorsFormat
}
@Override
- public void abort() {
- in.abort();
- }
-
- @Override
public void finish(FieldInfos fis, int numDocs) throws IOException {
assert docCount == numDocs;
assert docStatus == (numDocs > 0 ? Status.FINISHED : Status.UNDEFINED);
Modified: lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java (original)
+++ lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/CompressingCodec.java Sun Dec 7 11:37:32 2014
@@ -36,16 +36,16 @@ public abstract class CompressingCodec e
/**
* Create a random instance.
*/
- public static CompressingCodec randomInstance(Random random, int chunkSize, boolean withSegmentSuffix) {
+ public static CompressingCodec randomInstance(Random random, int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix) {
switch (random.nextInt(4)) {
case 0:
- return new FastCompressingCodec(chunkSize, withSegmentSuffix);
+ return new FastCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix);
case 1:
- return new FastDecompressionCompressingCodec(chunkSize, withSegmentSuffix);
+ return new FastDecompressionCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix);
case 2:
- return new HighCompressionCompressingCodec(chunkSize, withSegmentSuffix);
+ return new HighCompressionCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix);
case 3:
- return new DummyCompressingCodec(chunkSize, withSegmentSuffix);
+ return new DummyCompressingCodec(chunkSize, maxDocsPerChunk, withSegmentSuffix);
default:
throw new AssertionError();
}
@@ -56,14 +56,14 @@ public abstract class CompressingCodec e
* suffix
*/
public static CompressingCodec randomInstance(Random random) {
- return randomInstance(random, RandomInts.randomIntBetween(random, 1, 500), false);
+ return randomInstance(random, RandomInts.randomIntBetween(random, 1, 1 << 15), RandomInts.randomIntBetween(random, 64, 1024), false);
}
/**
* Creates a random {@link CompressingCodec} that is using a segment suffix
*/
public static CompressingCodec randomInstance(Random random, boolean withSegmentSuffix) {
- return randomInstance(random, RandomInts.randomIntBetween(random, 1, 500), withSegmentSuffix);
+ return randomInstance(random, RandomInts.randomIntBetween(random, 1, 1 << 15), RandomInts.randomIntBetween(random, 64, 1024), withSegmentSuffix);
}
private final CompressingStoredFieldsFormat storedFieldsFormat;
@@ -72,17 +72,17 @@ public abstract class CompressingCodec e
/**
* Creates a compressing codec with a given segment suffix
*/
- public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize) {
+ public CompressingCodec(String name, String segmentSuffix, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) {
super(name, TestUtil.getDefaultCodec());
- this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize);
+ this.storedFieldsFormat = new CompressingStoredFieldsFormat(name, segmentSuffix, compressionMode, chunkSize, maxDocsPerChunk);
this.termVectorsFormat = new CompressingTermVectorsFormat(name, segmentSuffix, compressionMode, chunkSize);
}
/**
* Creates a compressing codec with an empty segment suffix
*/
- public CompressingCodec(String name, CompressionMode compressionMode, int chunkSize) {
- this(name, "", compressionMode, chunkSize);
+ public CompressingCodec(String name, CompressionMode compressionMode, int chunkSize, int maxDocsPerChunk) {
+ this(name, "", compressionMode, chunkSize, maxDocsPerChunk);
}
@Override
Modified: lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java (original)
+++ lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastCompressingCodec.java Sun Dec 7 11:37:32 2014
@@ -21,14 +21,14 @@ package org.apache.lucene.codecs.compres
public class FastCompressingCodec extends CompressingCodec {
/** Constructor that allows to configure the chunk size. */
- public FastCompressingCodec(int chunkSize, boolean withSegmentSuffix) {
+ public FastCompressingCodec(int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix) {
super("FastCompressingStoredFields",
withSegmentSuffix ? "FastCompressingStoredFields" : "",
- CompressionMode.FAST, chunkSize);
+ CompressionMode.FAST, chunkSize, maxDocsPerChunk);
}
/** Default constructor. */
public FastCompressingCodec() {
- this(1 << 14, false);
+ this(1 << 14, 128, false);
}
}
Modified: lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java?rev=1643662&r1=1643661&r2=1643662&view=diff
==============================================================================
--- lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java (original)
+++ lucene/dev/branches/lucene6005/lucene/test-framework/src/java/org/apache/lucene/codecs/compressing/FastDecompressionCompressingCodec.java Sun Dec 7 11:37:32 2014
@@ -21,14 +21,14 @@ package org.apache.lucene.codecs.compres
public class FastDecompressionCompressingCodec extends CompressingCodec {
/** Constructor that allows to configure the chunk size. */
- public FastDecompressionCompressingCodec(int chunkSize, boolean withSegmentSuffix) {
+ public FastDecompressionCompressingCodec(int chunkSize, int maxDocsPerChunk, boolean withSegmentSuffix) {
super("FastDecompressionCompressingStoredFields",
withSegmentSuffix ? "FastDecompressionCompressingStoredFields" : "",
- CompressionMode.FAST_DECOMPRESSION, chunkSize);
+ CompressionMode.FAST_DECOMPRESSION, chunkSize, maxDocsPerChunk);
}
/** Default constructor. */
public FastDecompressionCompressingCodec() {
- this(1 << 14, false);
+ this(1 << 14, 256, false);
}
}