You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/01/25 15:38:43 UTC

svn commit: r737513 - in /lucene/java/trunk: ./ contrib/memory/src/java/org/apache/lucene/index/memory/ contrib/memory/src/test/org/apache/lucene/index/memory/ src/java/org/apache/lucene/index/ src/java/org/apache/lucene/search/ src/test/org/apache/luc...

Author: mikemccand
Date: Sun Jan 25 14:38:43 2009
New Revision: 737513

URL: http://svn.apache.org/viewvc?rev=737513&view=rev
Log:
LUCENE-1316: don't call synchronized IndexReader.isDeleted when scoring MatchAllDocsQuery

Added:
    lucene/java/trunk/src/java/org/apache/lucene/index/AllTermDocs.java   (with props)
Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
    lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
    lucene/java/trunk/src/java/org/apache/lucene/index/FilterIndexReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/MultiSegmentReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermDocs.java
    lucene/java/trunk/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestFilterIndexReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestMultiSegmentReader.java
    lucene/java/trunk/src/test/org/apache/lucene/index/TestParallelReader.java
    lucene/java/trunk/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Sun Jan 25 14:38:43 2009
@@ -145,6 +145,13 @@
  4. LUCENE-1224: Short circuit FuzzyQuery.rewrite when input token length 
     is small compared to minSimilarity. (Timo Nentwig, Mark Miller)
 
+ 5. LUCENE-1316: MatchAllDocsQuery now avoids the synchronized
+    IndexReader.isDeleted() call per document, by directly accessing
+    the underlying deleteDocs BitVector.  This improves performance
+    with non-readOnly readers, especially in a multi-threaded
+    environment.  (Todd Feak, Yonik Seeley, Jason Rutherglen via Mike
+    McCandless)
+
 Documentation
 
 Build

Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Sun Jan 25 14:38:43 2009
@@ -31,7 +31,6 @@
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
@@ -847,13 +846,19 @@
         private boolean hasNext;
         private int cursor = 0;
         private ArrayIntList current;
+        private Term term;
         
         public void seek(Term term) {
+          this.term = term;
           if (DEBUG) System.err.println(".seek: " + term);
-          Info info = getInfo(term.field());
-          current = info == null ? null : info.getPositions(term.text());
-          hasNext = (current != null);
-          cursor = 0;
+          if (term == null) {
+            hasNext = true;  // term==null means match all docs
+          } else {
+            Info info = getInfo(term.field());
+            current = info == null ? null : info.getPositions(term.text());
+            hasNext = (current != null);
+            cursor = 0;
+          }
         }
   
         public void seek(TermEnum termEnum) {
@@ -867,7 +872,7 @@
         }
   
         public int freq() {
-          int freq = current != null ? numPositions(current) : 0;
+          int freq = current != null ? numPositions(current) : (term == null ? 1 : 0);
           if (DEBUG) System.err.println(".freq: " + freq);
           return freq;
         }

Modified: lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (original)
+++ lucene/java/trunk/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java Sun Jan 25 14:38:43 2009
@@ -51,6 +51,7 @@
 import org.apache.lucene.search.Searcher;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.index.TermDocs;
 
 /**
 Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour,
@@ -282,7 +283,9 @@
 //        new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN, true, stopWords),        
 //        new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS),
     };
-    
+
+    boolean first = true;
+
     for (int iter=0; iter < iters; iter++) {
       System.out.println("\n########### iteration=" + iter);
       long start = System.currentTimeMillis();            
@@ -306,6 +309,18 @@
               boolean measureIndexing = false; // toggle this to measure query performance
               MemoryIndex memind = null;
               if (useMemIndex && !measureIndexing) memind = createMemoryIndex(doc);
+              
+              if (first) {
+                IndexSearcher s = memind.createSearcher();
+                TermDocs td = s.getIndexReader().termDocs(null);
+                assertTrue(td.next());
+                assertEquals(0, td.doc());
+                assertEquals(1, td.freq());
+                td.close();
+                s.close();
+                first = false;
+              }
+
               RAMDirectory ramind = null;
               if (useRAMIndex && !measureIndexing) ramind = createRAMIndex(doc);
               

Added: lucene/java/trunk/src/java/org/apache/lucene/index/AllTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/AllTermDocs.java?rev=737513&view=auto
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/AllTermDocs.java (added)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/AllTermDocs.java Sun Jan 25 14:38:43 2009
@@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import org.apache.lucene.util.BitVector;
+import java.io.IOException;
+
+class AllTermDocs implements TermDocs {
+  protected BitVector deletedDocs;
+  protected int maxDoc;
+  protected int doc = -1;
+
+  protected AllTermDocs(SegmentReader parent) {
+    synchronized (parent) {
+      this.deletedDocs = parent.deletedDocs;
+    }
+    this.maxDoc = parent.maxDoc();
+  }
+
+  public void seek(Term term) throws IOException {
+    if (term==null) {
+      doc = -1;
+    } else {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  public void seek(TermEnum termEnum) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  public int doc() {
+    return doc;
+  }
+
+  public int freq() {
+    return 1;
+  }
+
+  public boolean next() throws IOException {
+    return skipTo(doc+1);
+  }
+
+  public int read(int[] docs, int[] freqs) throws IOException {
+    final int length = docs.length;
+    int i = 0;
+    while (i < length && doc < maxDoc) {
+      if (deletedDocs == null || !deletedDocs.get(doc)) {
+        docs[i] = doc;
+        freqs[i] = 1;
+        ++i;
+      }
+      doc++;
+    }
+    return i;
+  }
+
+  public boolean skipTo(int target) throws IOException {
+    doc = target;
+    while (doc < maxDoc) {
+      if (deletedDocs == null || !deletedDocs.get(doc)) {
+        return true;
+      }
+      doc++;
+    }
+    return false;
+  }
+
+  public void close() throws IOException {
+  }
+}

Propchange: lucene/java/trunk/src/java/org/apache/lucene/index/AllTermDocs.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/FilterIndexReader.java Sun Jan 25 14:38:43 2009
@@ -198,6 +198,11 @@
     return in.termDocs();
   }
 
+  public TermDocs termDocs(Term term) throws IOException {
+    ensureOpen();
+    return in.termDocs(term);
+  }
+
   public TermPositions termPositions() throws IOException {
     ensureOpen();
     return in.termPositions();

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java Sun Jan 25 14:38:43 2009
@@ -796,7 +796,9 @@
 
   /** Returns an enumeration of all the documents which contain
    * <code>term</code>. For each document, the document number, the frequency of
-   * the term in that document is also provided, for use in search scoring.
+   * the term in that document is also provided, for use in
+   * search scoring.  If term is null, then all non-deleted
+   * docs are returned with freq=1.
    * Thus, this method implements the mapping:
    * <p><ul>
    * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/MultiSegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/MultiSegmentReader.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/MultiSegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/MultiSegmentReader.java Sun Jan 25 14:38:43 2009
@@ -19,7 +19,6 @@
 
 import java.io.IOException;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -531,7 +530,7 @@
   
       readerTermDocs = new TermDocs[r.length];
     }
-  
+
     public int doc() {
       return base + current.doc();
     }
@@ -601,8 +600,6 @@
     }
   
     private TermDocs termDocs(int i) throws IOException {
-      if (term == null)
-        return null;
       TermDocs result = readerTermDocs[i];
       if (result == null)
         result = readerTermDocs[i] = termDocs(readers[i]);
@@ -612,7 +609,7 @@
   
     protected TermDocs termDocs(IndexReader reader)
       throws IOException {
-      return reader.termDocs();
+      return term==null ? reader.termDocs(null) : reader.termDocs();
     }
   
     public void close() throws IOException {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java Sun Jan 25 14:38:43 2009
@@ -523,7 +523,12 @@
     protected TermDocs termDocs;
 
     public ParallelTermDocs() {}
-    public ParallelTermDocs(Term term) throws IOException { seek(term); }
+    public ParallelTermDocs(Term term) throws IOException {
+      if (term == null)
+        termDocs = readers.isEmpty() ? null : ((IndexReader)readers.get(0)).termDocs(null);
+      else
+        seek(term);
+    }
 
     public int doc() { return termDocs.doc(); }
     public int freq() { return termDocs.freq(); }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java Sun Jan 25 14:38:43 2009
@@ -724,6 +724,14 @@
     return (deletedDocs != null && deletedDocs.get(n));
   }
 
+  public TermDocs termDocs(Term term) throws IOException {
+    if (term == null) {
+      return new AllTermDocs(this);
+    } else {
+      return super.termDocs(term);
+    }
+  }
+
   public TermDocs termDocs() throws IOException {
     ensureOpen();
     return new SegmentTermDocs(this);

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermDocs.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermDocs.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentTermDocs.java Sun Jan 25 14:38:43 2009
@@ -46,7 +46,9 @@
   protected SegmentTermDocs(SegmentReader parent) {
     this.parent = parent;
     this.freqStream = (IndexInput) parent.freqStream.clone();
-    this.deletedDocs = parent.deletedDocs;
+    synchronized (parent) {
+      this.deletedDocs = parent.deletedDocs;
+    }
     this.skipInterval = parent.tis.getSkipInterval();
     this.maxSkipLevels = parent.tis.getMaxSkipLevels();
   }

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/MatchAllDocsQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/MatchAllDocsQuery.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/MatchAllDocsQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/MatchAllDocsQuery.java Sun Jan 25 14:38:43 2009
@@ -18,15 +18,11 @@
  */
 
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.Explanation;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.Searcher;
-import org.apache.lucene.search.Similarity;
-import org.apache.lucene.search.Weight;
+import org.apache.lucene.index.TermDocs;
 import org.apache.lucene.util.ToStringUtils;
 
 import java.util.Set;
+import java.io.IOException;
 
 /**
  * A query that matches all documents.
@@ -38,17 +34,13 @@
   }
 
   private class MatchAllScorer extends Scorer {
-
-    final IndexReader reader;
-    int id;
-    final int maxId;
+    final TermDocs termDocs;
     final float score;
 
-    MatchAllScorer(IndexReader reader, Similarity similarity, Weight w) {
+    MatchAllScorer(IndexReader reader, Similarity similarity, Weight w) throws IOException
+    {
       super(similarity);
-      this.reader = reader;
-      id = -1;
-      maxId = reader.maxDoc() - 1;
+      this.termDocs = reader.termDocs(null);
       score = w.getValue();
     }
 
@@ -57,26 +49,19 @@
     }
 
     public int doc() {
-      return id;
+      return termDocs.doc();
     }
 
-    public boolean next() {
-      while (id < maxId) {
-        id++;
-        if (!reader.isDeleted(id)) {
-          return true;
-        }
-      }
-      return false;
+    public boolean next() throws IOException {
+      return termDocs.next();
     }
 
     public float score() {
       return score;
     }
 
-    public boolean skipTo(int target) {
-      id = target - 1;
-      return next();
+    public boolean skipTo(int target) throws IOException {
+      return termDocs.skipTo(target);
     }
 
   }
@@ -112,7 +97,7 @@
       queryWeight *= this.queryNorm;
     }
 
-    public Scorer scorer(IndexReader reader) {
+    public Scorer scorer(IndexReader reader) throws IOException {
       return new MatchAllScorer(reader, similarity, this);
     }
 

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestFilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestFilterIndexReader.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestFilterIndexReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestFilterIndexReader.java Sun Jan 25 14:38:43 2009
@@ -125,6 +125,15 @@
       assertTrue((positions.doc() % 2) == 1);
     }
 
+    int NUM_DOCS = 3;
+
+    TermDocs td = reader.termDocs(null);
+    for(int i=0;i<NUM_DOCS;i++) {
+      assertTrue(td.next());
+      assertEquals(i, td.doc());
+      assertEquals(1, td.freq());
+    }
+    td.close();
     reader.close();
     directory.close();
   }

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestMultiSegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestMultiSegmentReader.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestMultiSegmentReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestMultiSegmentReader.java Sun Jan 25 14:38:43 2009
@@ -149,6 +149,19 @@
     mr.close();
   }
 
+  public void testAllTermDocs() throws IOException {
+    IndexReader reader = openReader();
+    int NUM_DOCS = 2;
+    TermDocs td = reader.termDocs(null);
+    for(int i=0;i<NUM_DOCS;i++) {
+      assertTrue(td.next());
+      assertEquals(i, td.doc());
+      assertEquals(1, td.freq());
+    }
+    td.close();
+    reader.close();
+  }
+
   private void addDoc(RAMDirectory ramDir1, String s, boolean create) throws IOException {
     IndexWriter iw = new IndexWriter(ramDir1, new StandardAnalyzer(), create, IndexWriter.MaxFieldLength.LIMITED);
     Document doc = new Document();

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestParallelReader.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestParallelReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestParallelReader.java Sun Jan 25 14:38:43 2009
@@ -123,7 +123,7 @@
   
   public void testIsCurrent() throws IOException {
     Directory dir1 = getDir1();
-    Directory dir2 = getDir1();
+    Directory dir2 = getDir2();
     ParallelReader pr = new ParallelReader();
     pr.add(IndexReader.open(dir1));
     pr.add(IndexReader.open(dir2));
@@ -147,7 +147,7 @@
 
   public void testIsOptimized() throws IOException {
     Directory dir1 = getDir1();
-    Directory dir2 = getDir1();
+    Directory dir2 = getDir2();
     
     // add another document to ensure that the indexes are not optimized
     IndexWriter modifier = new IndexWriter(dir1, new StandardAnalyzer(), IndexWriter.MaxFieldLength.LIMITED);
@@ -194,6 +194,25 @@
 
   }
 
+  public void testAllTermDocs() throws IOException {
+    Directory dir1 = getDir1();
+    Directory dir2 = getDir2();
+    ParallelReader pr = new ParallelReader();
+    pr.add(IndexReader.open(dir1));
+    pr.add(IndexReader.open(dir2));
+    int NUM_DOCS = 2;
+    TermDocs td = pr.termDocs(null);
+    for(int i=0;i<NUM_DOCS;i++) {
+      assertTrue(td.next());
+      assertEquals(i, td.doc());
+      assertEquals(1, td.freq());
+    }
+    td.close();
+    pr.close();
+    dir1.close();
+    dir2.close();
+  }
+    
   
   private void queryTest(Query query) throws IOException {
     ScoreDoc[] parallelHits = parallel.search(query, null, 1000).scoreDocs;

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java?rev=737513&r1=737512&r2=737513&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java Sun Jan 25 14:38:43 2009
@@ -36,6 +36,7 @@
   public void testQuery() throws IOException {
     RAMDirectory dir = new RAMDirectory();
     IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
+    iw.setMaxBufferedDocs(2);  // force multi-segment
     addDoc("one", iw);
     addDoc("two", iw);
     addDoc("three four", iw);