You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/09/12 20:51:37 UTC
svn commit: r1702661 - in /lucene/dev/trunk: lucene/core/src/java/org/apache/lucene/util/ solr/ solr/core/src/java/org/apache/solr/query/ solr/core/src/java/org/apache/solr/schema/ solr/core/src/java/org/apache/solr/search/ solr/core/src/test/org/apach...

Author: yonik
Date: Sat Sep 12 18:51:36 2015
New Revision: 1702661

URL: http://svn.apache.org/r1702661
Log:
SOLR-8037: speed up term range queries, use filter cache for embedded ranges

Added:
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java   (with props)
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java   (with props)
Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldType.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TextField.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/BitDocSet.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFiltering.java

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java?rev=1702661&r1=1702660&r2=1702661&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java Sat Sep 12 18:51:36 2015
@@ -21,8 +21,9 @@ import java.util.Arrays;
 
 /**
  * A LSB Radix sorter for unsigned int values.
+ * @lucene.internal
  */
-final class LSBRadixSorter {
+public final class LSBRadixSorter {
 
   private static final int INSERTION_SORT_THRESHOLD = 30;
   private static final int HISTOGRAM_SIZE = 256;

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1702661&r1=1702660&r2=1702661&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sat Sep 12 18:51:36 2015
@@ -72,6 +72,11 @@ Optimizations
 * SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been
   specified.  Speedups of up to 8% were observed.  (yonik)
 
+* SOLR-8037: Speed up creation of filters from term range queries (i.e. non-numeric range queries)
+  and use the filter cache for term range queries that are part of larger queries.  Some observed
+  speedups were up to 2.5x for production of filters, and up to 10x for query evaluation with
+  embedded term range queres that resulted in filter cache hits.  (yonik)
+
 
 Other Changes
 ----------------------

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java?rev=1702661&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java Sat Sep 12 18:51:36 2015
@@ -0,0 +1,502 @@
+package org.apache.solr.query;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BulkScorer;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.ConstantScoreScorer;
+import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Filter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.DocIdSetBuilder;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.solr.search.BitDocSet;
+import org.apache.solr.search.DocSet;
+import org.apache.solr.search.DocSetBuilder;
+import org.apache.solr.search.DocSetProducer;
+import org.apache.solr.search.ExtendedQueryBase;
+import org.apache.solr.search.SolrConstantScoreQuery;
+import org.apache.solr.search.SolrIndexSearcher;
+
+/** @lucene.experimental */
+public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetProducer {
+  private final String field;
+  private final BytesRef lower;
+  private final BytesRef upper;
+  private byte flags;
+  private static byte FLAG_INC_LOWER = 0x01;
+  private static byte FLAG_INC_UPPER = 0x02;
+
+  public SolrRangeQuery(String field, BytesRef lower, BytesRef upper, boolean includeLower, boolean includeUpper) {
+    this.field = field;
+    this.lower = lower;
+    this.upper = upper;
+    this.flags = (byte)((this.lower != null && includeLower ? FLAG_INC_LOWER : 0) | (this.upper != null && includeUpper ? FLAG_INC_UPPER : 0));
+  }
+
+  public String getField() {
+    return field;
+  }
+
+  public boolean includeLower() {
+    return (flags & FLAG_INC_LOWER) != 0;
+  }
+
+  public boolean includeUpper() {
+    return (flags & FLAG_INC_UPPER) != 0;
+  }
+
+  @Override
+  public int hashCode() {
+    int hash = 0x8f2c9ba7 * (flags+1);  // avoid multiplying by 0
+    hash = hash * 29 + ((lower == null) ? 0 : lower.hashCode());  // TODO: simpler hash code here?
+    hash = hash * 29 + ((upper == null) ? 0 : upper.hashCode());
+    return hash;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    }
+    if (!(obj instanceof SolrRangeQuery)) {
+      return false;
+    }
+    SolrRangeQuery other = (SolrRangeQuery)obj;
+
+    return (this.flags == other.flags)
+        && (this.field.equals(other.field))
+        && (this.lower == other.lower || (this.lower != null && other.lower != null && this.lower.equals(other.lower)))
+        && (this.upper == other.upper || (this.upper != null && other.upper != null && this.upper.equals(other.upper)))
+        ;
+  }
+
+  @Override
+  public String toString(String field) {
+    StringBuilder buffer = new StringBuilder();
+    if (!getField().equals(field)) {
+      buffer.append(getField());
+      buffer.append(":");
+    }
+    // TODO: use our schema?
+    buffer.append(includeLower() ? '[' : '{');
+    buffer.append(endpoint(lower));
+    buffer.append(" TO ");
+    buffer.append(endpoint(upper));
+    buffer.append(includeUpper() ? ']' : '}');
+    return buffer.toString();
+  }
+
+  private String endpoint(BytesRef ref) {
+    if (ref == null) return "*";
+    String toStr = Term.toString(ref);
+    if ("*".equals(toStr)) {
+      toStr = "\\*";
+    }
+    // TODO: other escaping
+    return toStr;
+  }
+
+  @Override
+  public Query rewrite(IndexReader reader) throws IOException {
+    return this;
+  }
+
+  @Override
+  public Weight createWeight(IndexSearcher searcher, boolean needScores) throws IOException {
+    return new ConstWeight(searcher, needScores);
+    /*
+    DocSet docs = createDocSet(searcher.getIndexReader().leaves(), searcher.getIndexReader().maxDoc());
+    SolrConstantScoreQuery csq = new SolrConstantScoreQuery( docs.getTopFilter() );
+    return csq.createWeight(searcher, needScores);
+    */
+  }
+
+  @Override
+  public DocSet createDocSet(SolrIndexSearcher searcher) throws IOException {
+    return createDocSet( searcher, Math.min(64,(searcher.maxDoc()>>>10)+4) );
+  }
+
+  private DocSet createDocSet(SolrIndexSearcher searcher, long cost) throws IOException {
+    int maxDoc = searcher.maxDoc();
+    BitDocSet liveDocs = searcher.getLiveDocs();
+    FixedBitSet liveBits = liveDocs.size() == maxDoc ? null : liveDocs.getBits();
+
+    DocSetBuilder builder = new DocSetBuilder(maxDoc, cost);
+
+    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
+
+    int maxTermsPerSegment = 0;
+    for (LeafReaderContext ctx : leaves) {
+      TermsEnum te = getTermsEnum(ctx);
+      int termsVisited = builder.add(te, ctx.docBase);
+      maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
+    }
+
+    return maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
+  }
+
+
+  private class RangeTermsEnum extends TermsEnum {
+
+    TermsEnum te;
+    BytesRef curr;
+    boolean positioned;
+
+    public RangeTermsEnum(Terms terms) throws IOException {
+      if (terms == null) {
+        positioned = true;
+      } else {
+        te = terms.iterator();
+        if (lower != null) {
+          TermsEnum.SeekStatus status = te.seekCeil(lower);
+          if (status == TermsEnum.SeekStatus.END) {
+            positioned = true;
+            curr = null;
+          } else if (status == SeekStatus.FOUND) {
+            positioned = includeLower();
+            curr = te.term();
+          } else {
+            // lower bound not found, so includeLower is irrelevant
+            positioned = true;
+            curr = te.term();
+          }
+        }
+      }
+    }
+
+    @Override
+    public SeekStatus seekCeil(BytesRef text) throws IOException {
+      return te.seekCeil(text);
+    }
+
+    @Override
+    public void seekExact(long ord) throws IOException {
+      te.seekExact(ord);
+    }
+
+    @Override
+    public BytesRef term() throws IOException {
+      return te.term(); // should be equal to curr, except if we went past the end
+    }
+
+    @Override
+    public long ord() throws IOException {
+      return te.ord();
+    }
+
+    @Override
+    public int docFreq() throws IOException {
+      return te.docFreq();
+    }
+
+    @Override
+    public long totalTermFreq() throws IOException {
+      return te.totalTermFreq();
+    }
+
+    @Override
+    public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
+      return te.postings(reuse, flags);
+    }
+
+    @Override
+    public BytesRef next() throws IOException {
+      if (positioned) {
+        positioned = false;
+      } else {
+        curr = te.next();
+      }
+
+      if (curr == null) return null;
+
+      if (upper != null) {
+        int cmp = curr.compareTo(upper);
+        if (cmp < 0 || cmp == 0 && includeUpper()) {
+          return curr;
+        } else {
+          curr = null;
+        }
+      }
+      return curr;
+    }
+
+    @Override
+    public AttributeSource attributes() {
+      return te.attributes();
+    }
+
+    @Override
+    public boolean seekExact(BytesRef text) throws IOException {
+      return te.seekExact(text);
+    }
+
+    @Override
+    public void seekExact(BytesRef term, TermState state) throws IOException {
+      te.seekExact(term, state);
+    }
+
+    @Override
+    public TermState termState() throws IOException {
+      return te.termState();
+    }
+  }
+
+
+  public TermsEnum getTermsEnum(LeafReaderContext ctx) throws IOException {
+    return new RangeTermsEnum( ctx.reader().terms(getField()) );
+  }
+
+
+  private static class TermAndState {
+    final BytesRef term;
+    final TermState state;
+    final int docFreq;
+    final long totalTermFreq;
+
+    TermAndState(BytesRef term, TermState state, int docFreq, long totalTermFreq) {
+      this.term = term;
+      this.state = state;
+      this.docFreq = docFreq;
+      this.totalTermFreq = totalTermFreq;
+    }
+  }
+
+  private static class SegState {
+    final Weight weight;
+    final DocIdSet set;
+
+    SegState(Weight weight) {
+      this.weight = weight;
+      this.set = null;
+    }
+
+    SegState(DocIdSet set) {
+      this.set = set;
+      this.weight = null;
+    }
+  }
+
+  // adapted from MultiTermQueryConstantScoreWrapper
+  class ConstWeight extends ConstantScoreWeight {
+
+    private static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
+
+    final IndexSearcher searcher;
+    final boolean needScores;
+    boolean checkedFilterCache;
+    Filter filter;
+    final SegState[] segStates;
+
+
+    protected ConstWeight(IndexSearcher searcher, boolean needScores) {
+      super( SolrRangeQuery.this );
+      this.searcher = searcher;
+      this.segStates = new SegState[ searcher.getIndexReader().leaves().size() ];
+      this.needScores = needScores;
+    }
+
+
+    /** Try to collect terms from the given terms enum and return count=sum(df) for terms visited so far
+     *  or (-count - 1) if this should be rewritten into a boolean query.
+     *  The termEnum will already be positioned on the next term if not exhausted.
+     */
+    private long collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException {
+      long count = 0;
+      final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
+      for (int i = 0; i < threshold; ++i) {
+        final BytesRef term = termsEnum.next();
+        if (term == null) {
+          return -count - 1;
+        }
+        TermState state = termsEnum.termState();
+        if (state.isRealTerm() == false) {
+          // TermQuery does not accept fake terms for now
+          return count;
+        }
+        int df = termsEnum.docFreq();
+        count += df;
+        terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, df, termsEnum.totalTermFreq()));
+      }
+      return termsEnum.next() == null ? (-count - 1) : count;
+    }
+
+    private SegState getSegState(LeafReaderContext context) throws IOException {
+      SegState segState = segStates[context.ord];
+      if (segState != null) return segState;
+
+      // first time, check our filter cache
+      boolean doCheck = !checkedFilterCache && context.ord == 0;
+      checkedFilterCache = true;
+      SolrIndexSearcher solrSearcher = null;
+      if (doCheck && searcher instanceof SolrIndexSearcher) {
+        solrSearcher = (SolrIndexSearcher)searcher;
+        if (solrSearcher.getFilterCache() == null) {
+          doCheck = false;
+        } else {
+          solrSearcher = (SolrIndexSearcher)searcher;
+          DocSet answer = solrSearcher.getFilterCache().get(SolrRangeQuery.this);
+          if (answer != null) {
+            filter = answer.getTopFilter();
+          }
+        }
+      }
+
+      if (filter != null) {
+        return segStates[context.ord] = new SegState(filter.getDocIdSet(context, null));
+      }
+
+
+      final Terms terms = context.reader().terms(SolrRangeQuery.this.getField());
+      if (terms == null) {
+        return segStates[context.ord] = new SegState((DocIdSet) null);
+      }
+
+      final TermsEnum termsEnum = SolrRangeQuery.this.getTermsEnum(context);
+
+      PostingsEnum docs = null;
+
+      final List<TermAndState> collectedTerms = new ArrayList<>();
+      long count = collectTerms(context, termsEnum, collectedTerms);
+      if (count < 0) {
+        BooleanQuery.Builder bq = new BooleanQuery.Builder();
+        for (TermAndState t : collectedTerms) {
+          final TermContext termContext = new TermContext(searcher.getTopReaderContext());
+          termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
+          bq.add(new TermQuery(new Term( SolrRangeQuery.this.getField(), t.term), termContext), BooleanClause.Occur.SHOULD);
+        }
+        Query q = new ConstantScoreQuery(bq.build());
+        final Weight weight = searcher.rewrite(q).createWeight(searcher, needScores);
+        weight.normalize(1f, score());
+        return segStates[context.ord] = new SegState(weight);
+      }
+
+      // Too many terms for boolean query...
+
+      if (doCheck) {
+        DocSet answer = createDocSet(solrSearcher, count);
+        solrSearcher.getFilterCache().put(SolrRangeQuery.this, answer);
+        filter = answer.getTopFilter();
+        return segStates[context.ord] = new SegState(filter.getDocIdSet(context, null));
+      }
+
+      /* FUTURE: reuse term states in the future to help build DocSet, use collected count so far...
+      Bits liveDocs = context.reader().getLiveDocs();
+      int base = context.docBase;
+      int termsVisited = collectedTerms.size();
+
+      DocSetBuilder builder = new DocSetBuilder(searcher.getIndexReader().maxDoc());
+      if (!collectedTerms.isEmpty()) {
+        TermsEnum termsEnum2 = terms.iterator();
+        for (TermAndState t : collectedTerms) {
+          termsEnum2.seekExact(t.term, t.state);
+          docs = termsEnum2.postings(docs, PostingsEnum.NONE);
+          builder.add(docs, context.docBase, liveDocs);
+        }
+      }
+
+      termsVisited += builder.add(termsEnum, base, liveDocs);
+     */
+
+      DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc());
+      builder.grow((int)Math.min(Integer.MAX_VALUE,count));
+      if (collectedTerms.isEmpty() == false) {
+        TermsEnum termsEnum2 = terms.iterator();
+        for (TermAndState t : collectedTerms) {
+          termsEnum2.seekExact(t.term, t.state);
+          docs = termsEnum2.postings(docs, PostingsEnum.NONE);
+          builder.add(docs);
+        }
+      }
+
+      do {
+        // already positioned on the next term, so don't call next() here...
+        docs = termsEnum.postings(docs, PostingsEnum.NONE);
+        builder.add(docs);
+      } while (termsEnum.next() != null);
+
+      DocIdSet segSet = builder.build();
+      return segStates[context.ord] = new SegState(segSet);
+    }
+
+    private Scorer scorer(DocIdSet set) throws IOException {
+      if (set == null) {
+        return null;
+      }
+      final DocIdSetIterator disi = set.iterator();
+      if (disi == null) {
+        return null;
+      }
+      return new ConstantScoreScorer(this, score(), disi);
+    }
+
+    @Override
+    public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
+      final SegState weightOrBitSet = getSegState(context);
+      if (weightOrBitSet.weight != null) {
+        return weightOrBitSet.weight.bulkScorer(context);
+      } else {
+        final Scorer scorer = scorer(weightOrBitSet.set);
+        if (scorer == null) {
+          return null;
+        }
+        return new DefaultBulkScorer(scorer);
+      }
+    }
+
+    @Override
+    public Scorer scorer(LeafReaderContext context) throws IOException {
+      final SegState weightOrBitSet = getSegState(context);
+      if (weightOrBitSet.weight != null) {
+        return weightOrBitSet.weight.scorer(context);
+      } else {
+        return scorer(weightOrBitSet.set);
+      }
+    }
+  }
+}
+
+
+
+
+
+
+

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldType.java?rev=1702661&r1=1702660&r2=1702661&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldType.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/FieldType.java Sat Sep 12 18:51:36 2015
@@ -63,6 +63,7 @@ import org.apache.solr.common.SolrExcept
 import org.apache.solr.common.util.Base64;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.query.SolrRangeQuery;
 import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.Sorting;
@@ -720,12 +721,11 @@ public abstract class FieldType extends
           part2 == null ? null : new BytesRef(toInternal(part2)),
           minInclusive, maxInclusive);
     } else {
-      MultiTermQuery rangeQuery = TermRangeQuery.newStringRange(
+      SolrRangeQuery rangeQuery = new SolrRangeQuery(
             field.getName(),
-            part1 == null ? null : toInternal(part1),
-            part2 == null ? null : toInternal(part2),
+            part1 == null ? null : new BytesRef(toInternal(part1)),
+            part2 == null ? null : new BytesRef(toInternal(part2)),
             minInclusive, maxInclusive);
-      rangeQuery.setRewriteMethod(getRewriteMethod(parser, field));
       return rangeQuery;
     }
   }

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TextField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TextField.java?rev=1702661&r1=1702660&r2=1702661&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TextField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/schema/TextField.java Sat Sep 12 18:51:36 2015
@@ -28,6 +28,7 @@ import org.apache.lucene.uninverting.Uni
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.QueryBuilder;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.query.SolrRangeQuery;
 import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.Sorting;
@@ -136,7 +137,7 @@ public class TextField extends FieldType
     Analyzer multiAnalyzer = getMultiTermAnalyzer();
     BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer);
     BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer);
-    return new TermRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
+    return new SolrRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
   }
 
   public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/BitDocSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/BitDocSet.java?rev=1702661&r1=1702660&r2=1702661&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/BitDocSet.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/BitDocSet.java Sat Sep 12 18:51:36 2015
@@ -285,8 +285,7 @@ public class BitDocSet extends DocSetBas
         }
 
         final int base = context.docBase;
-        final int maxDoc = reader.maxDoc();
-        final int max = base + maxDoc;   // one past the max doc in this segment.
+        final int max = base + reader.maxDoc();   // one past the max doc in this segment.
 
         return BitsFilteredDocIdSet.wrap(new DocIdSet() {
           @Override
@@ -302,10 +301,11 @@ public class BitDocSet extends DocSetBas
 
               @Override
               public int nextDoc() {
-                if (pos >= bs.length() - 1) {
+                int next = pos+1;
+                if (next >= max) {
                   return adjustedDoc = NO_MORE_DOCS;
                 } else {
-                  pos = bs.nextSetBit(pos + 1);
+                  pos = bs.nextSetBit(next);
                   return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
                 }
               }
@@ -314,7 +314,7 @@ public class BitDocSet extends DocSetBas
               public int advance(int target) {
                 if (target == NO_MORE_DOCS) return adjustedDoc = NO_MORE_DOCS;
                 int adjusted = target + base;
-                if (adjusted >= bs.length()) {
+                if (adjusted >= max) {
                   return adjustedDoc = NO_MORE_DOCS;
                 } else {
                   pos = bs.nextSetBit(adjusted);
@@ -326,6 +326,7 @@ public class BitDocSet extends DocSetBas
               public long cost() {
                 // we don't want to actually compute cardinality, but
                 // if it's already been computed, we use it (pro-rated for the segment)
+                int maxDoc = max-base;
                 if (size != -1) {
                   return (long)(size * ((FixedBitSet.bits2words(maxDoc)<<6) / (float)bs.length()));
                 } else {
@@ -350,7 +351,7 @@ public class BitDocSet extends DocSetBas
 
               @Override
               public int length() {
-                return maxDoc;
+                return max-base;
               }
             };
           }

Added: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java?rev=1702661&view=auto
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java (added)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java Sat Sep 12 18:51:36 2015
@@ -0,0 +1,222 @@
+package org.apache.solr.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LSBRadixSorter;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Adapted from DocIdSetBuilder to build DocSets
+ *
+ * @lucene.internal
+ */
+public final class DocSetBuilder {
+
+  private final int maxDoc;
+  private final int threshold;
+
+  private int[] buffer;
+  private int pos;
+
+  private FixedBitSet bitSet;
+
+
+  public DocSetBuilder(int maxDoc, long costEst) {
+    this.maxDoc = maxDoc;
+    // For ridiculously small sets, we'll just use a sorted int[]
+    // maxDoc >>> 7 is a good value if you want to save memory, lower values
+    // such as maxDoc >>> 11 should provide faster building but at the expense
+    // of using a full bitset even for quite sparse data
+    this.threshold = (maxDoc >>> 7) + 4; // the +4 is for better testing on small indexes
+
+    if (costEst > threshold) {
+      bitSet = new FixedBitSet(maxDoc);
+    } else {
+      this.buffer = new int[Math.max((int)costEst,1)];
+    }
+  }
+
+  private void upgradeToBitSet() {
+    assert bitSet == null;
+    bitSet = new FixedBitSet(maxDoc);
+    for (int i = 0; i < pos; ++i) {
+      bitSet.set(buffer[i]);
+    }
+    this.buffer = null;
+    this.pos = 0;
+  }
+
+  private void growBuffer(int minSize) {
+    if (minSize < buffer.length) return;
+
+    int newSize = buffer.length;
+    while (newSize < minSize) {
+      newSize = newSize << 1;
+    }
+    newSize = Math.min(newSize, threshold);
+
+    int[] newBuffer = new int[newSize];
+    System.arraycopy(buffer, 0, newBuffer, 0, pos);
+    buffer = newBuffer;
+  }
+
+  public void add(DocIdSetIterator iter, int base) throws IOException {
+    grow((int) Math.min(Integer.MAX_VALUE, iter.cost()));
+
+    if (bitSet != null) {
+      add(bitSet, iter, base);
+    } else {
+      while (true) {
+        for (int i = pos; i < buffer.length; ++i) {
+          final int doc = iter.nextDoc();
+          if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+            pos = i; // update pos
+            return;
+          }
+          buffer[i] = doc + base;  // using the loop counter may help with removal of bounds checking
+        }
+
+        pos = buffer.length; // update pos
+        if (pos + 1 >= threshold) {
+          break;
+        }
+
+        growBuffer(pos + 1);
+      }
+
+      upgradeToBitSet();
+      add(bitSet, iter, base);
+    }
+  }
+
+
+  public static void add(FixedBitSet bitSet, DocIdSetIterator iter, int base) throws IOException {
+    for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
+      bitSet.set(doc + base);
+    }
+  }
+
+  /** Returns the number of terms visited */
+  public int add(TermsEnum te, int base) throws IOException {
+    PostingsEnum postings = null;
+
+    int termCount = 0;
+    for(;;) {
+      BytesRef term = te.next();
+      if (term == null) break;
+      termCount++;
+      postings = te.postings(postings, PostingsEnum.NONE);
+      add(postings, base);
+    }
+
+    return termCount;
+  }
+
+
+  public void grow(int numDocs) {
+    if (bitSet == null) {
+      final long newLength = pos + numDocs;
+      if (newLength < threshold) {
+        growBuffer((int) newLength);
+      } else {
+        upgradeToBitSet();
+      }
+    }
+  }
+
+
+  public void add(int doc) {
+    if (bitSet != null) {
+      bitSet.set(doc);
+    } else {
+      if (pos >= buffer.length) {
+        if (pos + 1 >= threshold) {
+          upgradeToBitSet();
+          bitSet.set(doc);
+          return;
+        }
+        growBuffer(pos + 1);
+      }
+      buffer[pos++] = doc;
+    }
+  }
+
+  private static int dedup(int[] arr, int length, FixedBitSet acceptDocs) {
+    if (length == 0) {
+      return 0;
+    }
+    int l = 1;
+    int previous = arr[0];
+    for (int i = 1; i < length; ++i) {
+      final int value = arr[i];
+      assert value >= previous;
+      if (value != previous) {
+        if (acceptDocs == null || acceptDocs.get(value)) {
+          arr[l++] = value;
+          previous = value;
+        }
+      }
+    }
+    return l;
+  }
+
+
+
+  public DocSet build(FixedBitSet filter) {
+    if (bitSet != null) {
+      if (filter != null) {
+        bitSet.and(filter);
+      }
+      return new BitDocSet(bitSet);
+      // TODO - if this set will be cached, should we make it smaller if it's below DocSetUtil.smallSetSize?
+    } else {
+      LSBRadixSorter sorter = new LSBRadixSorter();
+      sorter.sort(buffer, 0, pos);
+      final int l = dedup(buffer, pos, filter);
+      assert l <= pos;
+      return new SortedIntDocSet(buffer, l);  // TODO: have option to not shrink in the future if it will be a temporary set
+    }
+  }
+
+  /** Only use this if you know there were no duplicates and that docs were collected in-order! */
+  public DocSet buildUniqueInOrder(FixedBitSet filter) {
+    if (bitSet != null) {
+      if (filter != null) {
+        bitSet.and(filter);
+      }
+      return new BitDocSet(bitSet);
+    } else {
+      // don't need to sort, but still need to remove non accepted docs
+      int l = pos;
+      if (filter != null) {
+        l = dedup(buffer, pos, filter);
+      }
+      return new SortedIntDocSet(buffer, l);  // TODO: have option to not shrink in the future if it will be a temporary set
+    }
+  }
+
+}
+

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=1702661&r1=1702660&r2=1702661&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java Sat Sep 12 18:51:36 2015
@@ -462,6 +462,10 @@ public class SolrIndexSearcher extends I
     return fieldNames;
   }
 
+  public SolrCache<Query,DocSet> getFilterCache() {
+    return filterCache;
+  }
+
   /**
    * Returns a collection of the names of all stored fields which can be
    * highlighted the index reader knows about.
@@ -919,12 +923,12 @@ public class SolrIndexSearcher extends I
       DocSet absAnswer = filterCache.get(absQ);
       if (absAnswer!=null) {
         if (positive) return absAnswer;
-        else return getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer);
+        else return getLiveDocs().andNot(absAnswer);
       }
     }
 
     DocSet absAnswer = getDocSetNC(absQ, null);
-    DocSet answer = positive ? absAnswer : getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer);
+    DocSet answer = positive ? absAnswer : getLiveDocs().andNot(absAnswer);
 
     if (filterCache != null) {
       // cache negative queries as positive
@@ -948,7 +952,15 @@ public class SolrIndexSearcher extends I
   }
 
   private static Query matchAllDocsQuery = new MatchAllDocsQuery();
+  private BitDocSet liveDocs;
 
+  public BitDocSet getLiveDocs() throws IOException {
+    // going through the filter cache will provide thread safety here
+    if (liveDocs == null) {
+       liveDocs = getDocSetBits(matchAllDocsQuery);
+    }
+    return liveDocs;
+  }
 
   public static class ProcessedFilter {
     public DocSet answer;  // the answer, if non-null
@@ -1129,7 +1141,7 @@ public class SolrIndexSearcher extends I
 
     // Are all of our normal cached filters negative?
     if (end > 0 && answer==null) {
-      answer = getPositiveDocSet(matchAllDocsQuery);
+      answer = getLiveDocs();
     }
 
     // do negative queries first to shrink set size
@@ -1152,7 +1164,7 @@ public class SolrIndexSearcher extends I
     } else {
       if (postFilters == null) {
         if (answer == null) {
-          answer = getPositiveDocSet(matchAllDocsQuery);
+          answer = getLiveDocs();
         }
         // "answer" is the only part of the filter, so set it.
         pf.answer = answer;
@@ -2150,7 +2162,7 @@ public class SolrIndexSearcher extends I
 
     // if both negative, we need to create a temp DocSet since we
     // don't have a counting method that takes three.
-    DocSet all = getPositiveDocSet(matchAllDocsQuery);
+    DocSet all = getLiveDocs();
 
     // -a -b == *:*.andNot(a).andNotSize(b) == *.*.andNotSize(a.union(b))
     // we use the last form since the intermediate DocSet should normally be smaller.

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFiltering.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFiltering.java?rev=1702661&r1=1702660&r2=1702661&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFiltering.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/TestFiltering.java Sat Sep 12 18:51:36 2015
@@ -21,6 +21,7 @@ package org.apache.solr.search;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.request.SolrQueryRequest;
@@ -34,7 +35,7 @@ public class TestFiltering extends SolrT
   @BeforeClass
   public static void beforeTests() throws Exception {
     System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
-    initCore("solrconfig.xml","schema12.xml");
+    initCore("solrconfig.xml","schema_latest.xml");
   }
 
 
@@ -132,8 +133,79 @@ public class TestFiltering extends SolrT
   }
 
   static String f = "val_i";
+  static String f_s = "val_s";
+  static String f_s(int i) {
+    return String.format(Locale.ROOT, "%05d", i);
+  }
+
+
+  String rangeStr(String field, boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
+    String topLev="";
+    if (!cache || exclude) {
+      topLev = "{!" + (cache || random().nextBoolean() ? " cache=" + cache : "")
+          + (cost != 0 ? " cost=" + cost : "")
+          + ((exclude) ? " tag=t" : "") + "}";
+    }
+
+    String q = field + ":";
+    String q2 = q;
+
+    String lower1 = "[" + f_s(l);
+    String lower2 = l<=0 ? lower1 : ("{" + f_s(l-1));
+    String upper1 = f_s(u) + "]";
+    String upper2 = f_s(u+1) + "}";
+
+    if (random().nextBoolean()) {
+      q += lower1;
+      q2 += lower2;
+    } else {
+      q += lower2;
+      q2 += lower1;
+    }
+
+    q += " TO ";
+    q2 += " TO ";
+
+    if (random().nextBoolean()) {
+      q += upper1;
+      q2 += upper2;
+    } else {
+      q += upper2;
+      q2 += upper1;
+    }
+
+
+    // String q = field + ":[" + f_s(l) + " TO " + f_s(u) + "]";
+
+    if (negative) {
+      q = "-_query_:\"" + q + "\"";
+      // q = "-" + q; // TODO: need to be encapsulated for some reason?
+    } else {
+      if (random().nextBoolean()) {
+        // try some different query structures - important for testing different code paths
+        switch (random().nextInt(5)) {
+          case 0:
+            q = q + " OR id:RAND"+random().nextInt();
+            break;
+          case 1:
+            q = "id:RAND"+random().nextInt() + " OR " + q;
+            break;
+          case 2:
+            q = "*:* AND " + q;
+            break;
+          case 3:
+            q = q + " AND " + q2;
+            break;
+          case 4:
+            q = q + " OR " + q2;
+            break;
+        }
+      }
+    }
+    return topLev + q;
+  }
 
-  String frangeStr(boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
+  String frangeStr(String field, boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
 
     String topLev="";
     if (!cache || exclude) {
@@ -142,7 +214,7 @@ public class TestFiltering extends SolrT
         + ((exclude) ? " tag=t" : "");
     }
 
-    String ret = "{!frange v="+f+" l="+l+" u="+u;
+    String ret = "{!frange v="+field+" l="+l+" u="+u;
     if (negative) {
       ret = "-_query_:\"" + ret + "}\"";
       if (topLev.length()>0) {
@@ -165,7 +237,7 @@ public class TestFiltering extends SolrT
     FixedBitSet[] sets = facetQuery ? new FixedBitSet[]{model.facetQuery} :
         (exclude ? new FixedBitSet[]{model.answer, model.facetQuery} : new FixedBitSet[]{model.answer, model.multiSelect, model.facetQuery});
 
-    if (random().nextInt(100) < 50) {
+    if (random().nextInt(100) < 60) {
       // frange
       int l=0;
       int u=0;
@@ -201,7 +273,10 @@ public class TestFiltering extends SolrT
         }
       }
 
-      return frangeStr(!positive, l, u, cache, cost, exclude);
+      String whichField = random().nextBoolean() ? f : f_s;
+      return random().nextBoolean() ?
+           frangeStr(f, !positive, l, u, cache, cost, exclude)   // todo: frange doesn't work on the string field?
+         :  rangeStr(whichField, !positive, l, u, cache, cost, exclude);
     } else {
       // term or boolean query
       int numWords = FixedBitSet.bits2words(model.indexSize);
@@ -256,16 +331,17 @@ public class TestFiltering extends SolrT
     Model model = new Model();
 
     for (int iiter = 0; iiter<indexIter; iiter++) {
-      model.indexSize = random().nextInt(20 * RANDOM_MULTIPLIER) + 1;
+      model.indexSize = random().nextInt(40 * RANDOM_MULTIPLIER) + 1;
       clearIndex();
 
       for (int i=0; i<model.indexSize; i++) {
         String val = Integer.toString(i);
 
-        assertU(adoc("id",val,f,val));
+        SolrInputDocument doc = sdoc("id", val, f,val, f_s, f_s(i) );
+        updateJ(jsonAdd(doc), null);
         if (random().nextInt(100) < 20) {
           // duplicate doc 20% of the time (makes deletions)
-          assertU(adoc("id",val,f,val));
+          updateJ(jsonAdd(doc), null);
         }
         if (random().nextInt(100) < 10) {
           // commit 10% of the time (forces a new segment)