You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2014/10/24 15:45:22 UTC

svn commit: r1634054 - in /lucene/dev/branches/lucene_solr_4_10: ./ lucene/ lucene/memory/ lucene/memory/src/java/org/apache/lucene/index/memory/ lucene/memory/src/test/org/apache/lucene/index/memory/

Author: romseygeek
Date: Fri Oct 24 13:45:21 2014
New Revision: 1634054

URL: http://svn.apache.org/r1634054
Log:
LUCENE-5911: Revert backport

Added:
    lucene/dev/branches/lucene_solr_4_10/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
      - copied unchanged from r1634033, lucene/dev/branches/lucene_solr_4_10/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
Removed:
    lucene/dev/branches/lucene_solr_4_10/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
    lucene/dev/branches/lucene_solr_4_10/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java
Modified:
    lucene/dev/branches/lucene_solr_4_10/   (props changed)
    lucene/dev/branches/lucene_solr_4_10/lucene/   (props changed)
    lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene_solr_4_10/lucene/memory/   (props changed)
    lucene/dev/branches/lucene_solr_4_10/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java

Modified: lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt?rev=1634054&r1=1634053&r2=1634054&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt Fri Oct 24 13:45:21 2014
@@ -5,11 +5,6 @@ http://s.apache.org/luceneversions
 
 ======================= Lucene 4.10.2 ======================
 
-New Features
-
-* LUCENE-5911: Add MemoryIndex.freeze() to allow thread-safe searching over a 
-  MemoryIndex. (Alan Woodward, David Smiley, Robert Muir)
-
 Bug fixes
 
 * LUCENE-5977: Fix tokenstream safety checks in IndexWriter to properly

Modified: lucene/dev/branches/lucene_solr_4_10/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1634054&r1=1634053&r2=1634054&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Fri Oct 24 13:45:21 2014
@@ -17,6 +17,15 @@ package org.apache.lucene.index.memory;
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -28,8 +37,8 @@ import org.apache.lucene.index.AtomicRea
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.Fields;
@@ -47,30 +56,21 @@ import org.apache.lucene.search.IndexSea
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.RAMDirectory; // for javadocs
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
+import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.IntBlockPool;
 import org.apache.lucene.util.IntBlockPool.SliceReader;
 import org.apache.lucene.util.IntBlockPool.SliceWriter;
+import org.apache.lucene.util.IntBlockPool;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.RecyclingByteBlockAllocator;
 import org.apache.lucene.util.RecyclingIntBlockAllocator;
 
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NoSuchElementException;
-
 
 /**
  * High-performance single-document main memory Apache Lucene fulltext search index. 
@@ -154,12 +154,18 @@ import java.util.NoSuchElementException;
  * </pre>
  * 
  * 
- * <h4>Thread safety guarantees</h4>
- *
- * MemoryIndex is not normally thread-safe for adds or queries.  However, queries
- * are thread-safe after {@code freeze()} has been called.
- *
- *
+ * <h4>No thread safety guarantees</h4>
+ * 
+ * An instance can be queried multiple times with the same or different queries,
+ * but an instance is not thread-safe. If desired use idioms such as:
+ * <pre class="prettyprint">
+ * MemoryIndex index = ...
+ * synchronized (index) {
+ *    // read and/or write index (i.e. add fields and/or query)
+ * } 
+ * </pre>
+ * 
+ * 
  * <h4>Performance Notes</h4>
  * 
  * Internally there's a new data structure geared towards efficient indexing 
@@ -207,10 +213,6 @@ public class MemoryIndex {
   private HashMap<String,FieldInfo> fieldInfos = new HashMap<>();
 
   private Counter bytesUsed;
-
-  private boolean frozen = false;
-
-  private Similarity normSimilarity = IndexSearcher.getDefaultSimilarity();
   
   /**
    * Sorts term entries into ascending order; also works for
@@ -416,8 +418,6 @@ public class MemoryIndex {
    */
   public void addField(String fieldName, TokenStream stream, float boost, int positionIncrementGap, int offsetGap) {
     try {
-      if (frozen)
-        throw new IllegalArgumentException("Cannot call addField() when MemoryIndex is frozen");
       if (fieldName == null)
         throw new IllegalArgumentException("fieldName must not be null");
       if (stream == null)
@@ -503,15 +503,6 @@ public class MemoryIndex {
   }
 
   /**
-   * Set the Similarity to be used for calculating field norms
-   */
-  public void setSimilarity(Similarity similarity) {
-    if (frozen)
-      throw new IllegalArgumentException("Cannot set Similarity when MemoryIndex is frozen");
-    this.normSimilarity = similarity;
-  }
-
-  /**
    * Creates and returns a searcher that can be used to execute arbitrary
    * Lucene queries and to collect the resulting query results as hits.
    * 
@@ -520,24 +511,9 @@ public class MemoryIndex {
   public IndexSearcher createSearcher() {
     MemoryIndexReader reader = new MemoryIndexReader();
     IndexSearcher searcher = new IndexSearcher(reader); // ensures no auto-close !!
-    searcher.setSimilarity(normSimilarity);
+    reader.setSearcher(searcher); // to later get hold of searcher.getSimilarity()
     return searcher;
   }
-
-  /**
-   * Prepares the MemoryIndex for querying in a non-lazy way.
-   *
-   * After calling this you can query the MemoryIndex from multiple threads, but you
-   * cannot subsequently add new data.
-   */
-  public void freeze() {
-    this.frozen = true;
-    sortFields();
-    for (Map.Entry<String,Info> info : sortedFields) {
-      info.getValue().sortTerms();
-    }
-    calculateNormValues();
-  }
   
   /**
    * Convenience method that efficiently returns the relevance score by
@@ -711,10 +687,10 @@ public class MemoryIndex {
     private final long sumTotalTermFreq;
 
     /** the last position encountered in this field for multi field support*/
-    private final int lastPosition;
+    private int lastPosition;
 
     /** the last offset encountered in this field for multi field support*/
-    private final int lastOffset;
+    private int lastOffset;
 
     public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq) {
       this.terms = terms;
@@ -759,6 +735,8 @@ public class MemoryIndex {
    */
   private final class MemoryIndexReader extends AtomicReader {
     
+    private IndexSearcher searcher; // needed to find searcher.getSimilarity() 
+    
     private MemoryIndexReader() {
       super(); // avoid as much superclass baggage as possible
     }
@@ -1192,6 +1170,15 @@ public class MemoryIndex {
         return null;
       }
     }
+
+    private Similarity getSimilarity() {
+      if (searcher != null) return searcher.getSimilarity();
+      return IndexSearcher.getDefaultSimilarity();
+    }
+    
+    private void setSearcher(IndexSearcher searcher) {
+      this.searcher = searcher;
+    }
   
     @Override
     public int numDocs() {
@@ -1216,35 +1203,33 @@ public class MemoryIndex {
       if (DEBUG) System.err.println("MemoryIndexReader.doClose");
     }
     
+    /** performance hack: cache norms to avoid repeated expensive calculations */
+    private NumericDocValues cachedNormValues;
+    private String cachedFieldName;
+    private Similarity cachedSimilarity;
+    
     @Override
     public NumericDocValues getNormValues(String field) {
-      if (norms == null)
-        return calculateFieldNormValue(field);
-      return norms.get(field);
-    }
-
-  }
-
-  private Map<String, NumericDocValues> norms = null;
-
-  private NumericDocValues calculateFieldNormValue(String field) {
-    FieldInfo fieldInfo = fieldInfos.get(field);
-    if (fieldInfo == null)
-      return null;
-    Info info = fields.get(field);
-    int numTokens = info != null ? info.numTokens : 0;
-    int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
-    float boost = info != null ? info.getBoost() : 1.0f;
-    FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
-    long value = normSimilarity.computeNorm(invertState);
-    if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
-    return new MemoryIndexNormDocValues(value);
-  }
-
-  private void calculateNormValues() {
-    norms = new HashMap<>();
-    for (String field : fieldInfos.keySet()) {
-      norms.put(field, calculateFieldNormValue(field));
+      FieldInfo fieldInfo = fieldInfos.get(field);
+      if (fieldInfo == null || fieldInfo.omitsNorms())
+        return null;
+      NumericDocValues norms = cachedNormValues;
+      Similarity sim = getSimilarity();
+      if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
+        Info info = getInfo(field);
+        int numTokens = info != null ? info.numTokens : 0;
+        int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
+        float boost = info != null ? info.getBoost() : 1.0f; 
+        FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
+        long value = sim.computeNorm(invertState);
+        norms = new MemoryIndexNormDocValues(value);
+        // cache it for future reuse
+        cachedNormValues = norms;
+        cachedFieldName = field;
+        cachedSimilarity = sim;
+        if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
+      }
+      return norms;
     }
   }
   
@@ -1255,11 +1240,8 @@ public class MemoryIndex {
     this.fieldInfos.clear();
     this.fields.clear();
     this.sortedFields = null;
-    this.norms = null;
-    this.normSimilarity = IndexSearcher.getDefaultSimilarity();
     byteBlockPool.reset(false, false); // no need to 0-fill the buffers
     intBlockPool.reset(true, false); // here must must 0-fill since we use slices
-    this.frozen = false;
   }
   
   private static final class SliceByteStartArray extends DirectBytesStartArray {