You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2013/01/17 15:06:15 UTC

svn commit: r1434672 - in /lucene/dev/branches/branch_4x: ./ dev-tools/ lucene/ lucene/analysis/ lucene/analysis/icu/src/java/org/apache/lucene/collation/ lucene/backwards/ lucene/benchmark/ lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codec...

Author: simonw
Date: Thu Jan 17 14:06:13 2013
New Revision: 1434672

URL: http://svn.apache.org/viewvc?rev=1434672&view=rev
Log:
LUCENE-4687: Lazily initialize TermsEnum in BloomFilterPostingsFormat

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/dev-tools/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/BUILD.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/JRE_VERSION_MIGRATION.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/MIGRATE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/README.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/SYSTEM_REQUIREMENTS.txt   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/   (props changed)
    lucene/dev/branches/branch_4x/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilterFactory.java   (props changed)
    lucene/dev/branches/branch_4x/lucene/backwards/   (props changed)
    lucene/dev/branches/branch_4x/lucene/benchmark/   (props changed)
    lucene/dev/branches/branch_4x/lucene/build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/   (props changed)
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
    lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
    lucene/dev/branches/branch_4x/lucene/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/Terms.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip   (props changed)
    lucene/dev/branches/branch_4x/lucene/demo/   (props changed)
    lucene/dev/branches/branch_4x/lucene/facet/   (props changed)
    lucene/dev/branches/branch_4x/lucene/grouping/   (props changed)
    lucene/dev/branches/branch_4x/lucene/highlighter/   (props changed)
    lucene/dev/branches/branch_4x/lucene/ivy-settings.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/join/   (props changed)
    lucene/dev/branches/branch_4x/lucene/licenses/   (props changed)
    lucene/dev/branches/branch_4x/lucene/memory/   (props changed)
    lucene/dev/branches/branch_4x/lucene/misc/   (props changed)
    lucene/dev/branches/branch_4x/lucene/module-build.xml   (props changed)
    lucene/dev/branches/branch_4x/lucene/queries/   (props changed)
    lucene/dev/branches/branch_4x/lucene/queryparser/   (props changed)
    lucene/dev/branches/branch_4x/lucene/sandbox/   (props changed)
    lucene/dev/branches/branch_4x/lucene/site/   (props changed)
    lucene/dev/branches/branch_4x/lucene/spatial/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/lucene/tools/   (props changed)
    lucene/dev/branches/branch_4x/solr/   (props changed)
    lucene/dev/branches/branch_4x/solr/CHANGES.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/README.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/SYSTEM_REQUIREMENTS.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/cloud-dev/   (props changed)
    lucene/dev/branches/branch_4x/solr/common-build.xml   (props changed)
    lucene/dev/branches/branch_4x/solr/contrib/   (props changed)
    lucene/dev/branches/branch_4x/solr/core/   (props changed)
    lucene/dev/branches/branch_4x/solr/example/   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpclient-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpclient-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpcore-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpcore-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpmime-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/licenses/httpmime-NOTICE.txt   (props changed)
    lucene/dev/branches/branch_4x/solr/scripts/   (props changed)
    lucene/dev/branches/branch_4x/solr/site/   (props changed)
    lucene/dev/branches/branch_4x/solr/solrj/   (props changed)
    lucene/dev/branches/branch_4x/solr/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/solr/testlogging.properties   (props changed)
    lucene/dev/branches/branch_4x/solr/webapp/   (props changed)

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1434672&r1=1434671&r2=1434672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Jan 17 14:06:13 2013
@@ -14,6 +14,11 @@ Changes in backwards compatibility polic
   support in-memory caching, CategoryListCache was removed too.
   (Shai Erera, Michael McCandless)
 
+Optimizations
+
+* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate
+  TermsEnum only if needed to do a seek or get a DocsEnum. (Simon Willnauer) 
+
 New Features
 
 * LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the 

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java?rev=1434672&r1=1434671&r2=1434672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java Thu Jan 17 14:06:13 2013
@@ -240,26 +240,22 @@ public final class BloomFilteringPosting
       
       @Override
       public TermsEnum iterator(TermsEnum reuse) throws IOException {
-        TermsEnum result;
         if ((reuse != null) && (reuse instanceof BloomFilteredTermsEnum)) {
           // recycle the existing BloomFilteredTermsEnum by asking the delegate
           // to recycle its contained TermsEnum
           BloomFilteredTermsEnum bfte = (BloomFilteredTermsEnum) reuse;
           if (bfte.filter == filter) {
-            bfte.delegateTermsEnum = delegateTerms
-                .iterator(bfte.delegateTermsEnum);
+            bfte.reset(delegateTerms, bfte.delegateTermsEnum);
             return bfte;
           }
         }
         // We have been handed something we cannot reuse (either null, wrong
         // class or wrong filter) so allocate a new object
-        result = new BloomFilteredTermsEnum(delegateTerms.iterator(reuse),
-            filter);
-        return result;
+        return new BloomFilteredTermsEnum(delegateTerms, reuse, filter);
       }
       
       @Override
-      public Comparator<BytesRef> getComparator() throws IOException {
+      public Comparator<BytesRef> getComparator() {
         return delegateTerms.getComparator();
       }
       
@@ -299,24 +295,43 @@ public final class BloomFilteringPosting
       }
     }
     
-    class BloomFilteredTermsEnum extends TermsEnum {
+    final class BloomFilteredTermsEnum extends TermsEnum {
+      private Terms delegateTerms;
+      private TermsEnum delegateTermsEnum;
+      private TermsEnum reuseDelegate;
+      private final FuzzySet filter;
+      
+      public BloomFilteredTermsEnum(Terms delegateTerms, TermsEnum reuseDelegate, FuzzySet filter) throws IOException {
+        this.delegateTerms = delegateTerms;
+        this.reuseDelegate = reuseDelegate;
+        this.filter = filter;
+      }
       
-      TermsEnum delegateTermsEnum;
-      private FuzzySet filter;
+      void reset(Terms delegateTerms, TermsEnum reuseDelegate) throws IOException {
+        this.delegateTerms = delegateTerms;
+        this.reuseDelegate = reuseDelegate;
+        this.delegateTermsEnum = null;
+      }
       
-      public BloomFilteredTermsEnum(TermsEnum iterator, FuzzySet filter) {
-        this.delegateTermsEnum = iterator;
-        this.filter = filter;
+      private final TermsEnum delegate() throws IOException {
+        if (delegateTermsEnum == null) {
+          /* pull the iterator only if we really need it -
+           * this can be a relativly heavy operation depending on the 
+           * delegate postings format and they underlying directory
+           * (clone IndexInput) */
+          delegateTermsEnum = delegateTerms.iterator(reuseDelegate);
+        }
+        return delegateTermsEnum;
       }
       
       @Override
       public final BytesRef next() throws IOException {
-        return delegateTermsEnum.next();
+        return delegate().next();
       }
       
       @Override
       public final Comparator<BytesRef> getComparator() {
-        return delegateTermsEnum.getComparator();
+        return delegateTerms.getComparator();
       }
       
       @Override
@@ -330,51 +345,51 @@ public final class BloomFilteringPosting
         if (filter.contains(text) == ContainsResult.NO) {
           return false;
         }
-        return delegateTermsEnum.seekExact(text, useCache);
+        return delegate().seekExact(text, useCache);
       }
       
       @Override
       public final SeekStatus seekCeil(BytesRef text, boolean useCache)
           throws IOException {
-        return delegateTermsEnum.seekCeil(text, useCache);
+        return delegate().seekCeil(text, useCache);
       }
       
       @Override
       public final void seekExact(long ord) throws IOException {
-        delegateTermsEnum.seekExact(ord);
+        delegate().seekExact(ord);
       }
       
       @Override
       public final BytesRef term() throws IOException {
-        return delegateTermsEnum.term();
+        return delegate().term();
       }
       
       @Override
       public final long ord() throws IOException {
-        return delegateTermsEnum.ord();
+        return delegate().ord();
       }
       
       @Override
       public final int docFreq() throws IOException {
-        return delegateTermsEnum.docFreq();
+        return delegate().docFreq();
       }
       
       @Override
       public final long totalTermFreq() throws IOException {
-        return delegateTermsEnum.totalTermFreq();
+        return delegate().totalTermFreq();
       }
       
 
       @Override
       public DocsAndPositionsEnum docsAndPositions(Bits liveDocs,
           DocsAndPositionsEnum reuse, int flags) throws IOException {
-        return delegateTermsEnum.docsAndPositions(liveDocs, reuse, flags);
+        return delegate().docsAndPositions(liveDocs, reuse, flags);
       }
 
       @Override
       public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags)
           throws IOException {
-        return delegateTermsEnum.docs(liveDocs, reuse, flags);
+        return delegate().docs(liveDocs, reuse, flags);
       }
       
       
@@ -387,12 +402,10 @@ public final class BloomFilteringPosting
     private Map<FieldInfo,FuzzySet> bloomFilters = new HashMap<FieldInfo,FuzzySet>();
     private SegmentWriteState state;
     
-    // private PostingsFormat delegatePostingsFormat;
     
     public BloomFilteredFieldsConsumer(FieldsConsumer fieldsConsumer,
         SegmentWriteState state, PostingsFormat delegatePostingsFormat) {
       this.delegateFieldsConsumer = fieldsConsumer;
-      // this.delegatePostingsFormat=delegatePostingsFormat;
       this.state = state;
     }
     

Modified: lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1434672&r1=1434671&r2=1434672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Thu Jan 17 14:06:13 2013
@@ -272,7 +272,7 @@ public class SimpleTextTermVectorsReader
     }
 
     @Override
-    public Comparator<BytesRef> getComparator() throws IOException {
+    public Comparator<BytesRef> getComparator() {
       return BytesRef.getUTF8SortedAsUnicodeComparator();
     }
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1434672&r1=1434671&r2=1434672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Thu Jan 17 14:06:13 2013
@@ -92,7 +92,7 @@ public class FilterAtomicReader extends 
     }
 
     @Override
-    public Comparator<BytesRef> getComparator() throws IOException {
+    public Comparator<BytesRef> getComparator() {
       return in.getComparator();
     }
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/Terms.java?rev=1434672&r1=1434671&r2=1434672&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/Terms.java Thu Jan 17 14:06:13 2013
@@ -80,7 +80,7 @@ public abstract class Terms {
    *  if there are no terms.  This method may be invoked
    *  many times; it's best to cache a single instance &
    *  reuse it. */
-  public abstract Comparator<BytesRef> getComparator() throws IOException;
+  public abstract Comparator<BytesRef> getComparator();
 
   /** Returns the number of terms for this field, or -1 if this 
    *  measure isn't stored by the codec. Note that, just like