You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2014/10/21 11:16:55 UTC
svn commit: r1633322 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/memory/ lucene/memory/src/java/org/apache/lucene/index/memory/
lucene/memory/src/test/org/apache/lucene/index/memory/
Author: romseygeek
Date: Tue Oct 21 09:16:55 2014
New Revision: 1633322
URL: http://svn.apache.org/r1633322
Log:
LUCENE-5911: Remove cacheing of norms, calculate up front in freeze()
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/memory/ (props changed)
lucene/dev/branches/branch_5x/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1633322&r1=1633321&r2=1633322&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Tue Oct 21 09:16:55 2014
@@ -57,7 +57,7 @@ New Features
Robert Muir)
* LUCENE-5911: Add MemoryIndex.freeze() to allow thread-safe searching over a
- MemoryIndex. (Alan Woodward)
+ MemoryIndex. (Alan Woodward, David Smiley, Robert Muir)
* LUCENE-5969: Lucene 5.0 has a new index format with mismatched file detection,
improved exception handling, and indirect norms encoding for sparse fields.
Modified: lucene/dev/branches/branch_5x/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1633322&r1=1633321&r2=1633322&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/branch_5x/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Tue Oct 21 09:16:55 2014
@@ -208,6 +208,8 @@ public class MemoryIndex {
private Counter bytesUsed;
private boolean frozen = false;
+
+ private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();
/**
* Sorts term entries into ascending order; also works for
@@ -500,6 +502,15 @@ public class MemoryIndex {
}
/**
+ * Set the Similarity to be used for calculating field norms
+ */
+ public void setSimilarity(Similarity similarity) {
+ if (frozen)
+ throw new IllegalArgumentException("Cannot set Similarity when MemoryIndex is frozen");
+ this.normSimilarity = similarity;
+ }
+
+ /**
* Creates and returns a searcher that can be used to execute arbitrary
* Lucene queries and to collect the resulting query results as hits.
*
@@ -508,7 +519,7 @@ public class MemoryIndex {
public IndexSearcher createSearcher() {
MemoryIndexReader reader = new MemoryIndexReader();
IndexSearcher searcher = new IndexSearcher(reader); // ensures no auto-close !!
- reader.setSearcher(searcher); // to later get hold of searcher.getSimilarity()
+ searcher.setSimilarity(normSimilarity);
return searcher;
}
@@ -524,6 +535,7 @@ public class MemoryIndex {
for (Map.Entry<String,Info> info : sortedFields) {
info.getValue().sortTerms();
}
+ calculateNormValues();
}
/**
@@ -744,8 +756,6 @@ public class MemoryIndex {
*/
private final class MemoryIndexReader extends LeafReader {
- private IndexSearcher searcher; // needed to find searcher.getSimilarity()
-
private MemoryIndexReader() {
super(); // avoid as much superclass baggage as possible
}
@@ -1169,15 +1179,6 @@ public class MemoryIndex {
return null;
}
}
-
- private Similarity getSimilarity() {
- if (searcher != null) return searcher.getSimilarity();
- return IndexSearcher.getDefaultSimilarity();
- }
-
- private void setSearcher(IndexSearcher searcher) {
- this.searcher = searcher;
- }
@Override
public int numDocs() {
@@ -1202,33 +1203,35 @@ public class MemoryIndex {
if (DEBUG) System.err.println("MemoryIndexReader.doClose");
}
- /** performance hack: cache norms to avoid repeated expensive calculations */
- private NumericDocValues cachedNormValues;
- private String cachedFieldName;
- private Similarity cachedSimilarity;
-
@Override
public NumericDocValues getNormValues(String field) {
- FieldInfo fieldInfo = fieldInfos.get(field);
- if (fieldInfo == null || fieldInfo.omitsNorms())
- return null;
- NumericDocValues norms = cachedNormValues;
- Similarity sim = getSimilarity();
- if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
- Info info = getInfo(field);
- int numTokens = info != null ? info.numTokens : 0;
- int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
- float boost = info != null ? info.getBoost() : 1.0f;
- FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
- long value = sim.computeNorm(invertState);
- norms = new MemoryIndexNormDocValues(value);
- // cache it for future reuse
- cachedNormValues = norms;
- cachedFieldName = field;
- cachedSimilarity = sim;
- if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
- }
- return norms;
+ if (norms == null)
+ return calculateFieldNormValue(field);
+ return norms.get(field);
+ }
+
+ }
+
+ private Map<String, NumericDocValues> norms = null;
+
+ private NumericDocValues calculateFieldNormValue(String field) {
+ FieldInfo fieldInfo = fieldInfos.get(field);
+ if (fieldInfo == null)
+ return null;
+ Info info = fields.get(field);
+ int numTokens = info != null ? info.numTokens : 0;
+ int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
+ float boost = info != null ? info.getBoost() : 1.0f;
+ FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
+ long value = normSimilarity.computeNorm(invertState);
+ if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
+ return new MemoryIndexNormDocValues(value);
+ }
+
+ private void calculateNormValues() {
+ norms = new HashMap<>();
+ for (String field : fieldInfos.keySet()) {
+ norms.put(field, calculateFieldNormValue(field));
}
}
@@ -1239,6 +1242,8 @@ public class MemoryIndex {
this.fieldInfos.clear();
this.fields.clear();
this.sortedFields = null;
+ this.norms = null;
+ this.normSimilarity = IndexSearcher.getDefaultSimilarity();
byteBlockPool.reset(false, false); // no need to 0-fill the buffers
intBlockPool.reset(true, false); // here must must 0-fill since we use slices
this.frozen = false;
Modified: lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java?rev=1633322&r1=1633321&r2=1633322&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java (original)
+++ lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java Tue Oct 21 09:16:55 2014
@@ -18,13 +18,20 @@ package org.apache.lucene.index.memory;
*/
import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Before;
import org.junit.Test;
+import java.io.IOException;
+
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.CoreMatchers.not;
import static org.junit.internal.matchers.StringContains.containsString;
@@ -63,6 +70,14 @@ public class TestMemoryIndex extends Luc
assertThat(e.getMessage(), containsString("frozen"));
}
+ try {
+ mi.setSimilarity(new BM25Similarity(1, 1));
+ fail("Expected an IllegalArgumentException when setting the Similarity after calling freeze()");
+ }
+ catch (RuntimeException e) {
+ assertThat(e.getMessage(), containsString("frozen"));
+ }
+
assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));
mi.reset();
@@ -70,6 +85,32 @@ public class TestMemoryIndex extends Luc
assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));
+ // check we can set the Similarity again
+ mi.setSimilarity(new DefaultSimilarity());
+
+ }
+
+ @Test
+ public void testSimilarities() throws IOException {
+
+ MemoryIndex mi = new MemoryIndex();
+ mi.addField("f1", "a long text field that contains many many terms", analyzer);
+
+ IndexSearcher searcher = mi.createSearcher();
+ LeafReader reader = (LeafReader) searcher.getIndexReader();
+ float n1 = reader.getNormValues("f1").get(0);
+
+ // Norms aren't cached, so we can change the Similarity
+ mi.setSimilarity(new DefaultSimilarity() {
+ @Override
+ public float lengthNorm(FieldInvertState state) {
+ return 74;
+ }
+ });
+ float n2 = reader.getNormValues("f1").get(0);
+
+ assertTrue(n1 != n2);
+
}
Modified: lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java?rev=1633322&r1=1633321&r2=1633322&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java (original)
+++ lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java Tue Oct 21 09:16:55 2014
@@ -17,14 +17,6 @@ package org.apache.lucene.index.memory;
* limitations under the License.
*/
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.HashSet;
-import java.util.Set;
-
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CannedTokenStream;
@@ -40,7 +32,6 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.CompositeReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
@@ -50,6 +41,7 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
@@ -67,14 +59,22 @@ import org.apache.lucene.search.spans.Sp
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.ByteBlockPool.Allocator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.RecyclingByteBlockAllocator;
import org.apache.lucene.util.TestUtil;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Set;
+
import static org.hamcrest.CoreMatchers.equalTo;
/**