You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2014/10/21 11:16:55 UTC

svn commit: r1633322 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/memory/ lucene/memory/src/java/org/apache/lucene/index/memory/ lucene/memory/src/test/org/apache/lucene/index/memory/

Author: romseygeek
Date: Tue Oct 21 09:16:55 2014
New Revision: 1633322

URL: http://svn.apache.org/r1633322
Log:
LUCENE-5911: Remove cacheing of norms, calculate up front in freeze()

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/memory/   (props changed)
    lucene/dev/branches/branch_5x/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
    lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1633322&r1=1633321&r2=1633322&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Tue Oct 21 09:16:55 2014
@@ -57,7 +57,7 @@ New Features
   Robert Muir)
 
 * LUCENE-5911: Add MemoryIndex.freeze() to allow thread-safe searching over a 
-  MemoryIndex. (Alan Woodward)
+  MemoryIndex. (Alan Woodward, David Smiley, Robert Muir)
 
 * LUCENE-5969: Lucene 5.0 has a new index format with mismatched file detection,
   improved exception handling, and indirect norms encoding for sparse fields.

Modified: lucene/dev/branches/branch_5x/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1633322&r1=1633321&r2=1633322&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/branch_5x/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Tue Oct 21 09:16:55 2014
@@ -208,6 +208,8 @@ public class MemoryIndex {
   private Counter bytesUsed;
 
   private boolean frozen = false;
+
+  private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();
   
   /**
    * Sorts term entries into ascending order; also works for
@@ -500,6 +502,15 @@ public class MemoryIndex {
   }
 
   /**
+   * Set the Similarity to be used for calculating field norms
+   */
+  public void setSimilarity(Similarity similarity) {
+    if (frozen)
+      throw new IllegalArgumentException("Cannot set Similarity when MemoryIndex is frozen");
+    this.normSimilarity = similarity;
+  }
+
+  /**
    * Creates and returns a searcher that can be used to execute arbitrary
    * Lucene queries and to collect the resulting query results as hits.
    * 
@@ -508,7 +519,7 @@ public class MemoryIndex {
   public IndexSearcher createSearcher() {
     MemoryIndexReader reader = new MemoryIndexReader();
     IndexSearcher searcher = new IndexSearcher(reader); // ensures no auto-close !!
-    reader.setSearcher(searcher); // to later get hold of searcher.getSimilarity()
+    searcher.setSimilarity(normSimilarity);
     return searcher;
   }
 
@@ -524,6 +535,7 @@ public class MemoryIndex {
     for (Map.Entry<String,Info> info : sortedFields) {
       info.getValue().sortTerms();
     }
+    calculateNormValues();
   }
   
   /**
@@ -744,8 +756,6 @@ public class MemoryIndex {
    */
   private final class MemoryIndexReader extends LeafReader {
     
-    private IndexSearcher searcher; // needed to find searcher.getSimilarity() 
-    
     private MemoryIndexReader() {
       super(); // avoid as much superclass baggage as possible
     }
@@ -1169,15 +1179,6 @@ public class MemoryIndex {
         return null;
       }
     }
-
-    private Similarity getSimilarity() {
-      if (searcher != null) return searcher.getSimilarity();
-      return IndexSearcher.getDefaultSimilarity();
-    }
-    
-    private void setSearcher(IndexSearcher searcher) {
-      this.searcher = searcher;
-    }
   
     @Override
     public int numDocs() {
@@ -1202,33 +1203,35 @@ public class MemoryIndex {
       if (DEBUG) System.err.println("MemoryIndexReader.doClose");
     }
     
-    /** performance hack: cache norms to avoid repeated expensive calculations */
-    private NumericDocValues cachedNormValues;
-    private String cachedFieldName;
-    private Similarity cachedSimilarity;
-    
     @Override
     public NumericDocValues getNormValues(String field) {
-      FieldInfo fieldInfo = fieldInfos.get(field);
-      if (fieldInfo == null || fieldInfo.omitsNorms())
-        return null;
-      NumericDocValues norms = cachedNormValues;
-      Similarity sim = getSimilarity();
-      if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
-        Info info = getInfo(field);
-        int numTokens = info != null ? info.numTokens : 0;
-        int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
-        float boost = info != null ? info.getBoost() : 1.0f; 
-        FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
-        long value = sim.computeNorm(invertState);
-        norms = new MemoryIndexNormDocValues(value);
-        // cache it for future reuse
-        cachedNormValues = norms;
-        cachedFieldName = field;
-        cachedSimilarity = sim;
-        if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
-      }
-      return norms;
+      if (norms == null)
+        return calculateFieldNormValue(field);
+      return norms.get(field);
+    }
+
+  }
+
+  private Map<String, NumericDocValues> norms = null;
+
+  private NumericDocValues calculateFieldNormValue(String field) {
+    FieldInfo fieldInfo = fieldInfos.get(field);
+    if (fieldInfo == null)
+      return null;
+    Info info = fields.get(field);
+    int numTokens = info != null ? info.numTokens : 0;
+    int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
+    float boost = info != null ? info.getBoost() : 1.0f;
+    FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
+    long value = normSimilarity.computeNorm(invertState);
+    if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
+    return new MemoryIndexNormDocValues(value);
+  }
+
+  private void calculateNormValues() {
+    norms = new HashMap<>();
+    for (String field : fieldInfos.keySet()) {
+      norms.put(field, calculateFieldNormValue(field));
     }
   }
   
@@ -1239,6 +1242,8 @@ public class MemoryIndex {
     this.fieldInfos.clear();
     this.fields.clear();
     this.sortedFields = null;
+    this.norms = null;
+    this.normSimilarity = IndexSearcher.getDefaultSimilarity();
     byteBlockPool.reset(false, false); // no need to 0-fill the buffers
     intBlockPool.reset(true, false); // here must must 0-fill since we use slices
     this.frozen = false;

Modified: lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java?rev=1633322&r1=1633321&r2=1633322&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java (original)
+++ lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java Tue Oct 21 09:16:55 2014
@@ -18,13 +18,20 @@ package org.apache.lucene.index.memory;
  */
 
 import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.similarities.BM25Similarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.io.IOException;
+
 import static org.hamcrest.CoreMatchers.is;
 import static org.hamcrest.CoreMatchers.not;
 import static org.junit.internal.matchers.StringContains.containsString;
@@ -63,6 +70,14 @@ public class TestMemoryIndex extends Luc
       assertThat(e.getMessage(), containsString("frozen"));
     }
 
+    try {
+      mi.setSimilarity(new BM25Similarity(1, 1));
+      fail("Expected an IllegalArgumentException when setting the Similarity after calling freeze()");
+    }
+    catch (RuntimeException e) {
+      assertThat(e.getMessage(), containsString("frozen"));
+    }
+
     assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));
 
     mi.reset();
@@ -70,6 +85,32 @@ public class TestMemoryIndex extends Luc
     assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
     assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));
 
+    // check we can set the Similarity again
+    mi.setSimilarity(new DefaultSimilarity());
+
+  }
+
+  @Test
+  public void testSimilarities() throws IOException {
+
+    MemoryIndex mi = new MemoryIndex();
+    mi.addField("f1", "a long text field that contains many many terms", analyzer);
+
+    IndexSearcher searcher = mi.createSearcher();
+    LeafReader reader = (LeafReader) searcher.getIndexReader();
+    float n1 = reader.getNormValues("f1").get(0);
+
+    // Norms aren't cached, so we can change the Similarity
+    mi.setSimilarity(new DefaultSimilarity() {
+      @Override
+      public float lengthNorm(FieldInvertState state) {
+        return 74;
+      }
+    });
+    float n2 = reader.getNormValues("f1").get(0);
+
+    assertTrue(n1 != n2);
+
   }
 
 

Modified: lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java?rev=1633322&r1=1633321&r2=1633322&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java (original)
+++ lucene/dev/branches/branch_5x/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java Tue Oct 21 09:16:55 2014
@@ -17,14 +17,6 @@ package org.apache.lucene.index.memory;
  * limitations under the License.
  */
 
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
-import java.util.HashSet;
-import java.util.Set;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CannedTokenStream;
@@ -40,7 +32,6 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.CompositeReader;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsAndPositionsEnum;
@@ -50,6 +41,7 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.Term;
@@ -67,14 +59,22 @@ import org.apache.lucene.search.spans.Sp
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.util.ByteBlockPool.Allocator;
 import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.ByteBlockPool.Allocator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.RecyclingByteBlockAllocator;
 import org.apache.lucene.util.TestUtil;
 
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.HashSet;
+import java.util.Set;
+
 import static org.hamcrest.CoreMatchers.equalTo;
 
 /**