You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2014/09/29 12:21:12 UTC

svn commit: r1628154 - in /lucene/dev/trunk/lucene: ./ memory/src/java/org/apache/lucene/index/memory/ memory/src/test/org/apache/lucene/index/memory/

Author: romseygeek
Date: Mon Sep 29 10:21:12 2014
New Revision: 1628154

URL: http://svn.apache.org/r1628154
Log:
LUCENE-5911: Add MemoryIndex.freeze() to allow for thread-safe searching

Added:
    lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java   (with props)
    lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java
      - copied, changed from r1628145, lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
Removed:
    lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1628154&r1=1628153&r2=1628154&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Sep 29 10:21:12 2014
@@ -70,6 +70,9 @@ New Features
   exiting requests that take too long to enumerate over terms. (Anshum Gupta, Steve Rowe,
   Robert Muir)
 
+* LUCENE-5911: Add MemoryIndex.freeze() to allow thread-safe searching over a 
+  MemoryIndex. (Alan Woodward)
+
 API Changes
 
 * LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and

Modified: lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1628154&r1=1628153&r2=1628154&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Mon Sep 29 10:21:12 2014
@@ -17,15 +17,6 @@ package org.apache.lucene.index.memory;
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Comparator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NoSuchElementException;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -36,8 +27,8 @@ import org.apache.lucene.index.LeafReade
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.FieldInvertState;
 import org.apache.lucene.index.Fields;
@@ -55,21 +46,30 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.SimpleCollector;
 import org.apache.lucene.search.similarities.Similarity;
-import org.apache.lucene.store.RAMDirectory; // for javadocs
+import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 import org.apache.lucene.util.Counter;
+import org.apache.lucene.util.IntBlockPool;
 import org.apache.lucene.util.IntBlockPool.SliceReader;
 import org.apache.lucene.util.IntBlockPool.SliceWriter;
-import org.apache.lucene.util.IntBlockPool;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.RecyclingByteBlockAllocator;
 import org.apache.lucene.util.RecyclingIntBlockAllocator;
 
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
 
 /**
  * High-performance single-document main memory Apache Lucene fulltext search index. 
@@ -153,18 +153,12 @@ import org.apache.lucene.util.RecyclingI
  * </pre>
  * 
  * 
- * <h4>No thread safety guarantees</h4>
- * 
- * An instance can be queried multiple times with the same or different queries,
- * but an instance is not thread-safe. If desired use idioms such as:
- * <pre class="prettyprint">
- * MemoryIndex index = ...
- * synchronized (index) {
- *    // read and/or write index (i.e. add fields and/or query)
- * } 
- * </pre>
- * 
- * 
+ * <h4>Thread safety guarantees</h4>
+ *
+ * MemoryIndex is not normally thread-safe for adds or queries.  However, queries
+ * are thread-safe after {@code freeze()} has been called.
+ *
+ *
  * <h4>Performance Notes</h4>
  * 
  * Internally there's a new data structure geared towards efficient indexing 
@@ -212,6 +206,8 @@ public class MemoryIndex {
   private HashMap<String,FieldInfo> fieldInfos = new HashMap<>();
 
   private Counter bytesUsed;
+
+  private boolean frozen = false;
   
   /**
    * Sorts term entries into ascending order; also works for
@@ -417,6 +413,8 @@ public class MemoryIndex {
    */
   public void addField(String fieldName, TokenStream stream, float boost, int positionIncrementGap, int offsetGap) {
     try {
+      if (frozen)
+        throw new IllegalArgumentException("Cannot call addField() when MemoryIndex is frozen");
       if (fieldName == null)
         throw new IllegalArgumentException("fieldName must not be null");
       if (stream == null)
@@ -513,6 +511,20 @@ public class MemoryIndex {
     reader.setSearcher(searcher); // to later get hold of searcher.getSimilarity()
     return searcher;
   }
+
+  /**
+   * Prepares the MemoryIndex for querying in a non-lazy way.
+   *
+   * After calling this you can query the MemoryIndex from multiple threads, but you
+   * cannot subsequently add new data.
+   */
+  public void freeze() {
+    this.frozen = true;
+    sortFields();
+    for (Map.Entry<String,Info> info : sortedFields) {
+      info.getValue().sortTerms();
+    }
+  }
   
   /**
    * Convenience method that efficiently returns the relevance score by
@@ -684,10 +696,10 @@ public class MemoryIndex {
     private final long sumTotalTermFreq;
 
     /** the last position encountered in this field for multi field support*/
-    private int lastPosition;
+    private final int lastPosition;
 
     /** the last offset encountered in this field for multi field support*/
-    private int lastOffset;
+    private final int lastOffset;
 
     public Info(BytesRefHash terms, SliceByteStartArray sliceArray, int numTokens, int numOverlapTokens, float boost, int lastPosition, int lastOffset, long sumTotalTermFreq) {
       this.terms = terms;
@@ -1229,6 +1241,7 @@ public class MemoryIndex {
     this.sortedFields = null;
     byteBlockPool.reset(false, false); // no need to 0-fill the buffers
     intBlockPool.reset(true, false); // here must must 0-fill since we use slices
+    this.frozen = false;
   }
   
   private static final class SliceByteStartArray extends DirectBytesStartArray {

Added: lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java?rev=1628154&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java (added)
+++ lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndex.java Mon Sep 29 10:21:12 2014
@@ -0,0 +1,76 @@
+package org.apache.lucene.index.memory;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.CoreMatchers.not;
+import static org.junit.internal.matchers.StringContains.containsString;
+
+public class TestMemoryIndex extends LuceneTestCase {
+
+  private MockAnalyzer analyzer;
+
+  @Before
+  public void setup() {
+    analyzer = new MockAnalyzer(random());
+    analyzer.setEnableChecks(false);    // MemoryIndex can close a TokenStream on init error
+  }
+
+  @Test
+  public void testFreezeAPI() {
+
+    MemoryIndex mi = new MemoryIndex();
+    mi.addField("f1", "some text", analyzer);
+
+    assertThat(mi.search(new MatchAllDocsQuery()), not(is(0.0f)));
+    assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));
+
+    // check we can add a new field after searching
+    mi.addField("f2", "some more text", analyzer);
+    assertThat(mi.search(new TermQuery(new Term("f2", "some"))), not(is(0.0f)));
+
+    // freeze!
+    mi.freeze();
+
+    try {
+      mi.addField("f3", "and yet more", analyzer);
+      fail("Expected an IllegalArgumentException when adding a field after calling freeze()");
+    }
+    catch (RuntimeException e) {
+      assertThat(e.getMessage(), containsString("frozen"));
+    }
+
+    assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));
+
+    mi.reset();
+    mi.addField("f1", "wibble", analyzer);
+    assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
+    assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));
+
+  }
+
+
+}

Copied: lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java (from r1628145, lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java?p2=lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java&p1=lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java&r1=1628145&r2=1628154&rev=1628154&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (original)
+++ lucene/dev/trunk/lucene/memory/src/test/org/apache/lucene/index/memory/TestMemoryIndexAgainstRAMDir.java Mon Sep 29 10:21:12 2014
@@ -81,7 +81,7 @@ import static org.hamcrest.CoreMatchers.
  * Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour,
  * returning the same results for queries on some randomish indexes.
  */
-public class MemoryIndexTest extends BaseTokenStreamTestCase {
+public class TestMemoryIndexAgainstRAMDir extends BaseTokenStreamTestCase {
   private Set<String> queries = new HashSet<>();
   
   public static final int ITERATIONS = 100 * RANDOM_MULTIPLIER;