You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/01/27 12:11:24 UTC
svn commit: r1561634 - in /lucene/dev/branches/lucene5376/lucene:
analysis/common/src/test/org/apache/lucene/analysis/core/
facet/src/java/org/apache/lucene/facet/
facet/src/test/org/apache/lucene/facet/
server/src/java/org/apache/lucene/server/
Author: mikemccand
Date: Mon Jan 27 11:11:23 2014
New Revision: 1561634
URL: http://svn.apache.org/r1561634
Log:
simplify drill sideways; add nocommits
Removed:
lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysCollector.java
Modified:
lucene/dev/branches/lucene5376/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java
lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java
lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java
lucene/dev/branches/lucene5376/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/IndexState.java
Modified: lucene/dev/branches/lucene5376/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java?rev=1561634&r1=1561633&r2=1561634&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java (original)
+++ lucene/dev/branches/lucene5376/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java Mon Jan 27 11:11:23 2014
@@ -104,6 +104,7 @@ public class TestAllAnalyzersHaveFactori
List<Class<?>> analysisClasses = TestRandomChains.getClassesForPackage("org.apache.lucene.analysis");
for (final Class<?> c : analysisClasses) {
+ // nocommit can we avoid deprecated components...
final int modifiers = c.getModifiers();
if (
// don't waste time with abstract classes
@@ -133,7 +134,10 @@ public class TestAllAnalyzersHaveFactori
}
assertSame(c, instance.create().getClass());
} catch (IllegalArgumentException e) {
- if (!e.getMessage().contains("SPI")) {
+ // nocommit does this really catch a missing
+ // factory!? the exc message on a missing factory
+ // contains SPI ...
+ if (!e.getMessage().contains("missing parameter") && !e.getMessage().contains("SPI")) {
throw e;
}
// TODO: For now pass because some factories have not yet a default config that always works
@@ -155,7 +159,10 @@ public class TestAllAnalyzersHaveFactori
assertSame(c, createdClazz);
}
} catch (IllegalArgumentException e) {
- if (!e.getMessage().contains("SPI")) {
+ // nocommit does this really catch a missing
+ // factory!? the exc message on a missing factory
+ // contains SPI ...
+ if (!e.getMessage().contains("missing parameter") && !e.getMessage().contains("SPI")) {
throw e;
}
// TODO: For now pass because some factories have not yet a default config that always works
@@ -177,7 +184,10 @@ public class TestAllAnalyzersHaveFactori
assertSame(c, createdClazz);
}
} catch (IllegalArgumentException e) {
- if (!e.getMessage().contains("SPI")) {
+ // nocommit does this really catch a missing
+ // factory!? the exc message on a missing factory
+ // contains SPI ...
+ if (!e.getMessage().contains("missing parameter") && !e.getMessage().contains("SPI")) {
throw e;
}
// TODO: For now pass because some factories have not yet a default config that always works
Modified: lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java?rev=1561634&r1=1561633&r2=1561634&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java (original)
+++ lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSideways.java Mon Jan 27 11:11:23 2014
@@ -26,12 +26,9 @@ import org.apache.lucene.facet.sortedset
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
@@ -40,11 +37,9 @@ import org.apache.lucene.search.MultiCol
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
-import org.apache.lucene.search.Weight;
/**
* Computes drill down and sideways counts for the provided
@@ -172,144 +167,14 @@ public class DrillSideways {
drillSidewaysCollectors[i] = new FacetsCollector();
}
- boolean useCollectorMethod = scoreSubDocsAtOnce();
-
- Term[][] drillDownTerms = null;
-
- if (!useCollectorMethod) {
- // Optimistic: assume subQueries of the DDQ are either
- // TermQuery or BQ OR of TermQuery; if this is wrong
- // then we detect it and fallback to the more general
- // but slower DrillSidewaysCollector:
- drillDownTerms = new Term[clauses.length-startClause][];
- for(int i=startClause;i<clauses.length;i++) {
- Query q = clauses[i].getQuery();
-
- // DrillDownQuery always wraps each subQuery in
- // ConstantScoreQuery:
- assert q instanceof ConstantScoreQuery;
-
- q = ((ConstantScoreQuery) q).getQuery();
-
- if (q instanceof TermQuery) {
- drillDownTerms[i-startClause] = new Term[] {((TermQuery) q).getTerm()};
- } else if (q instanceof BooleanQuery) {
- BooleanQuery q2 = (BooleanQuery) q;
- BooleanClause[] clauses2 = q2.getClauses();
- drillDownTerms[i-startClause] = new Term[clauses2.length];
- for(int j=0;j<clauses2.length;j++) {
- if (clauses2[j].getQuery() instanceof TermQuery) {
- drillDownTerms[i-startClause][j] = ((TermQuery) clauses2[j].getQuery()).getTerm();
- } else {
- useCollectorMethod = true;
- break;
- }
- }
- } else {
- useCollectorMethod = true;
- }
- }
- }
-
- if (useCollectorMethod) {
- // TODO: maybe we could push the "collector method"
- // down into the optimized scorer to have a tighter
- // integration ... and so TermQuery clauses could
- // continue to run "optimized"
- collectorMethod(query, baseQuery, startClause, hitCollector, drillDownCollector, drillSidewaysCollectors);
- } else {
- DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
- searcher.search(dsq, hitCollector);
- }
-
- return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null);
- }
-
- /** Uses the more general but slower method of sideways
- * counting. This method allows an arbitrary subQuery to
- * implement the drill down for a given dimension. */
- private void collectorMethod(DrillDownQuery ddq, Query baseQuery, int startClause, Collector hitCollector, Collector drillDownCollector, Collector[] drillSidewaysCollectors) throws IOException {
-
- BooleanClause[] clauses = ddq.getBooleanQuery().getClauses();
-
- Map<String,Integer> drillDownDims = ddq.getDims();
-
- BooleanQuery topQuery = new BooleanQuery(true);
- final DrillSidewaysCollector collector = new DrillSidewaysCollector(hitCollector, drillDownCollector, drillSidewaysCollectors,
- drillDownDims);
-
- // TODO: if query is already a BQ we could copy that and
- // add clauses to it, instead of doing BQ inside BQ
- // (should be more efficient)? Problem is this can
- // affect scoring (coord) ... too bad we can't disable
- // coord on a clause by clause basis:
- topQuery.add(baseQuery, BooleanClause.Occur.MUST);
-
- // NOTE: in theory we could just make a single BQ, with
- // +query a b c minShouldMatch=2, but in this case,
- // annoyingly, BS2 wraps a sub-scorer that always
- // returns 2 as the .freq(), not how many of the
- // SHOULD clauses matched:
- BooleanQuery subQuery = new BooleanQuery(true);
-
- Query wrappedSubQuery = new QueryWrapper(subQuery,
- new SetWeight() {
- @Override
- public void set(Weight w) {
- collector.setWeight(w, -1);
- }
- });
- Query constantScoreSubQuery = new ConstantScoreQuery(wrappedSubQuery);
-
- // Don't impact score of original query:
- constantScoreSubQuery.setBoost(0.0f);
-
- topQuery.add(constantScoreSubQuery, BooleanClause.Occur.MUST);
-
- // Unfortunately this sub-BooleanQuery
- // will never get BS1 because today BS1 only works
- // if topScorer=true... and actually we cannot use BS1
- // anyways because we need subDocsScoredAtOnce:
- int dimIndex = 0;
+ Query[] drillDownQueries = new Query[clauses.length-startClause];
for(int i=startClause;i<clauses.length;i++) {
- Query q = clauses[i].getQuery();
- // DrillDownQuery always wraps each subQuery in
- // ConstantScoreQuery:
- assert q instanceof ConstantScoreQuery;
- q = ((ConstantScoreQuery) q).getQuery();
-
- final int finalDimIndex = dimIndex;
- subQuery.add(new QueryWrapper(q,
- new SetWeight() {
- @Override
- public void set(Weight w) {
- collector.setWeight(w, finalDimIndex);
- }
- }),
- BooleanClause.Occur.SHOULD);
- dimIndex++;
+ drillDownQueries[i-startClause] = clauses[i].getQuery();
}
+ DrillSidewaysQuery dsq = new DrillSidewaysQuery(baseQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce());
+ searcher.search(dsq, hitCollector);
- // TODO: we could better optimize the "just one drill
- // down" case w/ a separate [specialized]
- // collector...
- int minShouldMatch = drillDownDims.size()-1;
- if (minShouldMatch == 0) {
- // Must add another "fake" clause so BQ doesn't erase
- // itself by rewriting to the single clause:
- Query end = new MatchAllDocsQuery();
- end.setBoost(0.0f);
- subQuery.add(end, BooleanClause.Occur.SHOULD);
- minShouldMatch++;
- }
-
- subQuery.setMinimumNumberShouldMatch(minShouldMatch);
-
- // System.out.println("EXE " + topQuery);
-
- // Collects against the passed-in
- // drillDown/SidewaysCollectors as a side effect:
- searcher.search(topQuery, collector);
+ return new DrillSidewaysResult(buildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillDownDims.keySet().toArray(new String[drillDownDims.size()])), null);
}
/**
@@ -367,14 +232,15 @@ public class DrillSideways {
}
/** Override this and return true if your collector
- * (e.g., ToParentBlockJoinCollector) expects all
+ * (e.g., {@code ToParentBlockJoinCollector}) expects all
* sub-scorers to be positioned on the document being
* collected. This will cause some performance loss;
* default is false. Note that if you return true from
* this method (in a subclass) be sure your collector
* also returns false from {@link
* Collector#acceptsDocsOutOfOrder}: this will trick
- * BooleanQuery into also scoring all subDocs at once. */
+ * {@code BooleanQuery} into also scoring all subDocs at
+ * once. */
protected boolean scoreSubDocsAtOnce() {
return false;
}
@@ -394,54 +260,5 @@ public class DrillSideways {
this.hits = hits;
}
}
- private interface SetWeight {
- public void set(Weight w);
- }
-
- /** Just records which Weight was given out for the
- * (possibly rewritten) Query. */
- private static class QueryWrapper extends Query {
- private final Query originalQuery;
- private final SetWeight setter;
-
- public QueryWrapper(Query originalQuery, SetWeight setter) {
- this.originalQuery = originalQuery;
- this.setter = setter;
- }
-
- @Override
- public Weight createWeight(final IndexSearcher searcher) throws IOException {
- Weight w = originalQuery.createWeight(searcher);
- setter.set(w);
- return w;
- }
-
- @Override
- public Query rewrite(IndexReader reader) throws IOException {
- Query rewritten = originalQuery.rewrite(reader);
- if (rewritten != originalQuery) {
- return new QueryWrapper(rewritten, setter);
- } else {
- return this;
- }
- }
-
- @Override
- public String toString(String s) {
- return originalQuery.toString(s);
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof QueryWrapper)) return false;
- final QueryWrapper other = (QueryWrapper) o;
- return super.equals(o) && originalQuery.equals(other.originalQuery);
- }
-
- @Override
- public int hashCode() {
- return super.hashCode() * 31 + originalQuery.hashCode();
- }
- }
}
Modified: lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java?rev=1561634&r1=1561633&r2=1561634&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java (original)
+++ lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysQuery.java Mon Jan 27 11:11:23 2014
@@ -19,13 +19,8 @@ package org.apache.lucene.facet;
import java.io.IOException;
import java.util.Arrays;
-import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.Terms;
-import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
@@ -42,13 +37,15 @@ class DrillSidewaysQuery extends Query {
final Query baseQuery;
final Collector drillDownCollector;
final Collector[] drillSidewaysCollectors;
- final Term[][] drillDownTerms;
+ final Query[] drillDownQueries;
+ final boolean scoreSubDocsAtOnce;
- DrillSidewaysQuery(Query baseQuery, Collector drillDownCollector, Collector[] drillSidewaysCollectors, Term[][] drillDownTerms) {
+ DrillSidewaysQuery(Query baseQuery, Collector drillDownCollector, Collector[] drillSidewaysCollectors, Query[] drillDownQueries, boolean scoreSubDocsAtOnce) {
this.baseQuery = baseQuery;
this.drillDownCollector = drillDownCollector;
this.drillSidewaysCollectors = drillSidewaysCollectors;
- this.drillDownTerms = drillDownTerms;
+ this.drillDownQueries = drillDownQueries;
+ this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
}
@Override
@@ -69,13 +66,17 @@ class DrillSidewaysQuery extends Query {
if (newQuery == baseQuery) {
return this;
} else {
- return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownTerms);
+ return new DrillSidewaysQuery(newQuery, drillDownCollector, drillSidewaysCollectors, drillDownQueries, scoreSubDocsAtOnce);
}
}
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
final Weight baseWeight = baseQuery.createWeight(searcher);
+ final Weight[] drillDownWeights = new Weight[drillDownQueries.length];
+ for(int dim=0;dim<drillDownQueries.length;dim++) {
+ drillDownWeights[dim] = searcher.rewrite(drillDownQueries[dim]).createWeight(searcher);
+ }
return new Weight() {
@Override
@@ -109,39 +110,17 @@ class DrillSidewaysQuery extends Query {
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
boolean topScorer, Bits acceptDocs) throws IOException {
- DrillSidewaysScorer.DocsAndCost[] dims = new DrillSidewaysScorer.DocsAndCost[drillDownTerms.length];
- TermsEnum termsEnum = null;
- String lastField = null;
+ DrillSidewaysScorer.DocsAndCost[] dims = new DrillSidewaysScorer.DocsAndCost[drillDownWeights.length];
int nullCount = 0;
for(int dim=0;dim<dims.length;dim++) {
dims[dim] = new DrillSidewaysScorer.DocsAndCost();
dims[dim].sidewaysCollector = drillSidewaysCollectors[dim];
- String field = drillDownTerms[dim][0].field();
- dims[dim].dim = drillDownTerms[dim][0].text();
- if (lastField == null || !lastField.equals(field)) {
- AtomicReader reader = context.reader();
- Terms terms = reader.terms(field);
- if (terms != null) {
- termsEnum = terms.iterator(null);
- } else {
- termsEnum = null;
- }
- lastField = field;
- }
- dims[dim].disis = new DocIdSetIterator[drillDownTerms[dim].length];
- if (termsEnum == null) {
+ DocIdSetIterator disi = drillDownWeights[dim].scorer(context, true, false, null);
+ if (disi == null) {
nullCount++;
continue;
}
- for(int i=0;i<drillDownTerms[dim].length;i++) {
- if (termsEnum.seekExact(drillDownTerms[dim][i].bytes())) {
- DocIdSetIterator disi = termsEnum.docs(null, null, 0);
- if (disi != null) {
- dims[dim].disis[i] = disi;
- dims[dim].maxCost = Math.max(dims[dim].maxCost, disi.cost());
- }
- }
- }
+ dims[dim].disi = disi;
}
if (nullCount > 1 || (nullCount == 1 && dims.length == 1)) {
@@ -160,8 +139,9 @@ class DrillSidewaysQuery extends Query {
}
return new DrillSidewaysScorer(this, context,
- baseScorer,
- drillDownCollector, dims);
+ baseScorer,
+ drillDownCollector, dims,
+ scoreSubDocsAtOnce);
}
};
}
@@ -175,7 +155,7 @@ class DrillSidewaysQuery extends Query {
result = prime * result + ((baseQuery == null) ? 0 : baseQuery.hashCode());
result = prime * result
+ ((drillDownCollector == null) ? 0 : drillDownCollector.hashCode());
- result = prime * result + Arrays.hashCode(drillDownTerms);
+ result = prime * result + Arrays.hashCode(drillDownQueries);
result = prime * result + Arrays.hashCode(drillSidewaysCollectors);
return result;
}
@@ -192,7 +172,7 @@ class DrillSidewaysQuery extends Query {
if (drillDownCollector == null) {
if (other.drillDownCollector != null) return false;
} else if (!drillDownCollector.equals(other.drillDownCollector)) return false;
- if (!Arrays.equals(drillDownTerms, other.drillDownTerms)) return false;
+ if (!Arrays.equals(drillDownQueries, other.drillDownQueries)) return false;
if (!Arrays.equals(drillSidewaysCollectors, other.drillSidewaysCollectors)) return false;
return true;
}
Modified: lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java?rev=1561634&r1=1561633&r2=1561634&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java (original)
+++ lucene/dev/branches/lucene5376/lucene/facet/src/java/org/apache/lucene/facet/DrillSidewaysScorer.java Mon Jan 27 11:11:23 2014
@@ -22,7 +22,6 @@ import java.util.Collection;
import java.util.Collections;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Scorer;
@@ -42,6 +41,8 @@ class DrillSidewaysScorer extends Scorer
private final AtomicReaderContext context;
+ final boolean scoreSubDocsAtOnce;
+
private static final int CHUNK = 2048;
private static final int MASK = CHUNK-1;
@@ -49,12 +50,13 @@ class DrillSidewaysScorer extends Scorer
private float collectScore;
DrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector,
- DocsAndCost[] dims) {
+ DocsAndCost[] dims, boolean scoreSubDocsAtOnce) {
super(w);
this.dims = dims;
this.context = context;
this.baseScorer = baseScorer;
this.drillDownCollector = drillDownCollector;
+ this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
}
@Override
@@ -81,26 +83,19 @@ class DrillSidewaysScorer extends Scorer
// Position all scorers to their first matching doc:
baseScorer.nextDoc();
for (DocsAndCost dim : dims) {
- for (DocIdSetIterator disi : dim.disis) {
- if (disi != null) {
- disi.nextDoc();
- }
- }
+ dim.disi.nextDoc();
}
final int numDims = dims.length;
- DocIdSetIterator[][] disis = new DocIdSetIterator[numDims][];
+ DocIdSetIterator[] disis = new DocIdSetIterator[numDims];
Collector[] sidewaysCollectors = new Collector[numDims];
long drillDownCost = 0;
for (int dim=0;dim<numDims;dim++) {
- disis[dim] = dims[dim].disis;
+ DocIdSetIterator disi = dims[dim].disi;
+ disis[dim] = disi;
sidewaysCollectors[dim] = dims[dim].sidewaysCollector;
- for (DocIdSetIterator disi : dims[dim].disis) {
- if (disi != null) {
- drillDownCost += disi.cost();
- }
- }
+ drillDownCost += disi.cost();
}
long baseQueryCost = baseScorer.cost();
@@ -115,10 +110,10 @@ class DrillSidewaysScorer extends Scorer
}
*/
- if (baseQueryCost < drillDownCost/10) {
+ if (scoreSubDocsAtOnce || baseQueryCost < drillDownCost/10) {
//System.out.println("baseAdvance");
doBaseAdvanceScoring(collector, disis, sidewaysCollectors);
- } else if (numDims > 1 && (dims[1].maxCost < baseQueryCost/10)) {
+ } else if (numDims > 1 && dims[1].disi.cost() < baseQueryCost/10) {
//System.out.println("drillDownAdvance");
doDrillDownAdvanceScoring(collector, disis, sidewaysCollectors);
} else {
@@ -127,9 +122,62 @@ class DrillSidewaysScorer extends Scorer
}
}
+ /** Used when base query is highly constraining vs the
+ * drilldowns, or when the docs must be scored at once
+ * (i.e., like BooleanScorer2, not BooleanScorer). In
+ * this case we just .next() on base and .advance() on
+ * the dim filters. */
+ private void doBaseAdvanceScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors) throws IOException {
+ //if (DEBUG) {
+ // System.out.println(" doBaseAdvanceScoring");
+ //}
+ int docID = baseScorer.docID();
+
+ final int numDims = dims.length;
+
+ nextDoc: while (docID != NO_MORE_DOCS) {
+ int failedDim = -1;
+ for (int dim=0;dim<numDims;dim++) {
+ // TODO: should we sort this 2nd dimension of
+ // docsEnums from most frequent to least?
+ DocIdSetIterator disi = disis[dim];
+ if (disi.docID() < docID) {
+ disi.advance(docID);
+ }
+ if (disi.docID() > docID) {
+ if (failedDim != -1) {
+ // More than one dim fails on this document, so
+ // it's neither a hit nor a near-miss; move to
+ // next doc:
+ docID = baseScorer.nextDoc();
+ continue nextDoc;
+ } else {
+ failedDim = dim;
+ }
+ }
+ }
+
+ collectDocID = docID;
+
+ // TODO: we could score on demand instead since we are
+ // daat here:
+ collectScore = baseScorer.score();
+
+ if (failedDim == -1) {
+ // Hit passed all filters, so it's "real":
+ collectHit(collector, sidewaysCollectors);
+ } else {
+ // Hit missed exactly one filter:
+ collectNearMiss(sidewaysCollectors, failedDim);
+ }
+
+ docID = baseScorer.nextDoc();
+ }
+ }
+
/** Used when drill downs are highly constraining vs
* baseQuery. */
- private void doDrillDownAdvanceScoring(Collector collector, DocIdSetIterator[][] disis, Collector[] sidewaysCollectors) throws IOException {
+ private void doDrillDownAdvanceScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors) throws IOException {
final int maxDoc = context.reader().maxDoc();
final int numDims = dims.length;
@@ -158,69 +206,61 @@ class DrillSidewaysScorer extends Scorer
//if (DEBUG) {
// System.out.println(" dim0");
//}
- for (DocIdSetIterator disi : disis[0]) {
- if (disi == null) {
- continue;
- }
- int docID = disi.docID();
- while (docID < nextChunkStart) {
- int slot = docID & MASK;
-
- if (docIDs[slot] != docID) {
- seen.set(slot);
- // Mark slot as valid:
- //if (DEBUG) {
- // System.out.println(" set docID=" + docID + " id=" + context.reader().document(docID).get("id"));
- //}
- docIDs[slot] = docID;
- missingDims[slot] = 1;
- counts[slot] = 1;
- }
+ DocIdSetIterator disi = disis[0];
+ int docID = disi.docID();
+ while (docID < nextChunkStart) {
+ int slot = docID & MASK;
- docID = disi.nextDoc();
+ if (docIDs[slot] != docID) {
+ seen.set(slot);
+ // Mark slot as valid:
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " id=" + context.reader().document(docID).get("id"));
+ //}
+ docIDs[slot] = docID;
+ missingDims[slot] = 1;
+ counts[slot] = 1;
}
+
+ docID = disi.nextDoc();
}
// Second dim:
//if (DEBUG) {
// System.out.println(" dim1");
//}
- for (DocIdSetIterator disi : disis[1]) {
- if (disi == null) {
- continue;
- }
- int docID = disi.docID();
- while (docID < nextChunkStart) {
- int slot = docID & MASK;
+ disi = disis[1];
+ docID = disi.docID();
+ while (docID < nextChunkStart) {
+ int slot = docID & MASK;
- if (docIDs[slot] != docID) {
- // Mark slot as valid:
- seen.set(slot);
+ if (docIDs[slot] != docID) {
+ // Mark slot as valid:
+ seen.set(slot);
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id"));
+ //}
+ docIDs[slot] = docID;
+ missingDims[slot] = 0;
+ counts[slot] = 1;
+ } else {
+ // TODO: single-valued dims will always be true
+ // below; we could somehow specialize
+ if (missingDims[slot] >= 1) {
+ missingDims[slot] = 2;
+ counts[slot] = 2;
//if (DEBUG) {
- // System.out.println(" set docID=" + docID + " missingDim=0 id=" + context.reader().document(docID).get("id"));
+ // System.out.println(" set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id"));
//}
- docIDs[slot] = docID;
- missingDims[slot] = 0;
- counts[slot] = 1;
} else {
- // TODO: single-valued dims will always be true
- // below; we could somehow specialize
- if (missingDims[slot] >= 1) {
- missingDims[slot] = 2;
- counts[slot] = 2;
- //if (DEBUG) {
- // System.out.println(" set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id"));
- //}
- } else {
- counts[slot] = 1;
- //if (DEBUG) {
- // System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id"));
- //}
- }
+ counts[slot] = 1;
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id"));
+ //}
}
-
- docID = disi.nextDoc();
}
+
+ docID = disi.nextDoc();
}
// After this we can "upgrade" to conjunction, because
@@ -277,32 +317,29 @@ class DrillSidewaysScorer extends Scorer
//if (DEBUG) {
// System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
//}
- for (DocIdSetIterator disi : disis[dim]) {
- if (disi == null) {
- continue;
- }
- int docID = disi.docID();
- while (docID < nextChunkStart) {
- int slot = docID & MASK;
- if (docIDs[slot] == docID && counts[slot] >= dim) {
- // TODO: single-valued dims will always be true
- // below; we could somehow specialize
- if (missingDims[slot] >= dim) {
- //if (DEBUG) {
- // System.out.println(" set docID=" + docID + " count=" + (dim+2));
- //}
- missingDims[slot] = dim+1;
- counts[slot] = dim+2;
- } else {
- //if (DEBUG) {
- // System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
- //}
- counts[slot] = dim+1;
- }
+ disi = disis[dim];
+ docID = disi.docID();
+ while (docID < nextChunkStart) {
+ int slot = docID & MASK;
+ if (docIDs[slot] == docID && counts[slot] >= dim) {
+ // TODO: single-valued dims will always be true
+ // below; we could somehow specialize
+ if (missingDims[slot] >= dim) {
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " count=" + (dim+2));
+ //}
+ missingDims[slot] = dim+1;
+ counts[slot] = dim+2;
+ } else {
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
+ //}
+ counts[slot] = dim+1;
}
- // TODO: sometimes use advance?
- docID = disi.nextDoc();
}
+
+ // TODO: sometimes use advance?
+ docID = disi.nextDoc();
}
}
@@ -332,92 +369,7 @@ class DrillSidewaysScorer extends Scorer
}
}
- /** Used when base query is highly constraining vs the
- * drilldowns; in this case we just .next() on base and
- * .advance() on the dims. */
- private void doBaseAdvanceScoring(Collector collector, DocIdSetIterator[][] disis, Collector[] sidewaysCollectors) throws IOException {
- //if (DEBUG) {
- // System.out.println(" doBaseAdvanceScoring");
- //}
- int docID = baseScorer.docID();
-
- final int numDims = dims.length;
-
- nextDoc: while (docID != NO_MORE_DOCS) {
- int failedDim = -1;
- for (int dim=0;dim<numDims;dim++) {
- // TODO: should we sort this 2nd dimension of
- // docsEnums from most frequent to least?
- boolean found = false;
- for (DocIdSetIterator disi : disis[dim]) {
- if (disi == null) {
- continue;
- }
- if (disi.docID() < docID) {
- disi.advance(docID);
- }
- if (disi.docID() == docID) {
- found = true;
- break;
- }
- }
- if (!found) {
- if (failedDim != -1) {
- // More than one dim fails on this document, so
- // it's neither a hit nor a near-miss; move to
- // next doc:
- docID = baseScorer.nextDoc();
- continue nextDoc;
- } else {
- failedDim = dim;
- }
- }
- }
-
- collectDocID = docID;
-
- // TODO: we could score on demand instead since we are
- // daat here:
- collectScore = baseScorer.score();
-
- if (failedDim == -1) {
- collectHit(collector, sidewaysCollectors);
- } else {
- collectNearMiss(sidewaysCollectors, failedDim);
- }
-
- docID = baseScorer.nextDoc();
- }
- }
-
- private void collectHit(Collector collector, Collector[] sidewaysCollectors) throws IOException {
- //if (DEBUG) {
- // System.out.println(" hit");
- //}
-
- collector.collect(collectDocID);
- if (drillDownCollector != null) {
- drillDownCollector.collect(collectDocID);
- }
-
- // TODO: we could "fix" faceting of the sideways counts
- // to do this "union" (of the drill down hits) in the
- // end instead:
-
- // Tally sideways counts:
- for (int dim=0;dim<sidewaysCollectors.length;dim++) {
- sidewaysCollectors[dim].collect(collectDocID);
- }
- }
-
- private void collectNearMiss(Collector[] sidewaysCollectors, int dim) throws IOException {
- //if (DEBUG) {
- // System.out.println(" missingDim=" + dim);
- //}
- sidewaysCollectors[dim].collect(collectDocID);
- }
-
- private void doUnionScoring(Collector collector, DocIdSetIterator[][] disis, Collector[] sidewaysCollectors) throws IOException {
+ private void doUnionScoring(Collector collector, DocIdSetIterator[] disis, Collector[] sidewaysCollectors) throws IOException {
//if (DEBUG) {
// System.out.println(" doUnionScoring");
//}
@@ -479,91 +431,52 @@ class DrillSidewaysScorer extends Scorer
//if (DEBUG) {
// System.out.println(" dim=0 [" + dims[0].dim + "]");
//}
- for (DocIdSetIterator disi : disis[0]) {
- if (disi == null) {
- continue;
- }
- docID = disi.docID();
- //if (DEBUG) {
- // System.out.println(" start docID=" + docID);
- //}
- while (docID < nextChunkStart) {
- int slot = docID & MASK;
- if (docIDs[slot] == docID) {
- //if (DEBUG) {
- // System.out.println(" set docID=" + docID + " count=2");
- //}
- missingDims[slot] = 1;
- counts[slot] = 2;
- }
- docID = disi.nextDoc();
+ DocIdSetIterator disi = disis[0];
+ docID = disi.docID();
+ //if (DEBUG) {
+ // System.out.println(" start docID=" + docID);
+ //}
+ while (docID < nextChunkStart) {
+ int slot = docID & MASK;
+ if (docIDs[slot] == docID) {
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " count=2");
+ //}
+ missingDims[slot] = 1;
+ counts[slot] = 2;
}
+ docID = disi.nextDoc();
}
for (int dim=1;dim<numDims;dim++) {
//if (DEBUG) {
// System.out.println(" dim=" + dim + " [" + dims[dim].dim + "]");
//}
- for (DocIdSetIterator disi : disis[dim]) {
- if (disi == null) {
- continue;
- }
- docID = disi.docID();
- //if (DEBUG) {
- // System.out.println(" start docID=" + docID);
- //}
- while (docID < nextChunkStart) {
- int slot = docID & MASK;
- if (docIDs[slot] == docID && counts[slot] >= dim) {
- // This doc is still in the running...
- // TODO: single-valued dims will always be true
- // below; we could somehow specialize
- if (missingDims[slot] >= dim) {
- //if (DEBUG) {
- // System.out.println(" set docID=" + docID + " count=" + (dim+2));
- //}
- missingDims[slot] = dim+1;
- counts[slot] = dim+2;
- } else {
- //if (DEBUG) {
- // System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
- //}
- counts[slot] = dim+1;
- }
+ disi = disis[dim];
+ docID = disi.docID();
+ //if (DEBUG) {
+ // System.out.println(" start docID=" + docID);
+ //}
+ while (docID < nextChunkStart) {
+ int slot = docID & MASK;
+ if (docIDs[slot] == docID && counts[slot] >= dim) {
+ // This doc is still in the running...
+ // TODO: single-valued dims will always be true
+ // below; we could somehow specialize
+ if (missingDims[slot] >= dim) {
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " count=" + (dim+2));
+ //}
+ missingDims[slot] = dim+1;
+ counts[slot] = dim+2;
+ } else {
+ //if (DEBUG) {
+ // System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
+ //}
+ counts[slot] = dim+1;
}
- docID = disi.nextDoc();
}
-
- // TODO: sometimes use advance?
-
- /*
- int docBase = nextChunkStart - CHUNK;
- for (int i=0;i<filledCount;i++) {
- int slot = filledSlots[i];
- docID = docBase + filledSlots[i];
- if (docIDs[slot] == docID && counts[slot] >= dim) {
- // This doc is still in the running...
- int ddDocID = docsEnum.docID();
- if (ddDocID < docID) {
- ddDocID = docsEnum.advance(docID);
- }
- if (ddDocID == docID) {
- if (missingDims[slot] >= dim && counts[slot] == allMatchCount) {
- //if (DEBUG) {
- // System.out.println(" set docID=" + docID + " count=" + (dim+2));
- // }
- missingDims[slot] = dim+1;
- counts[slot] = dim+2;
- } else {
- //if (DEBUG) {
- // System.out.println(" set docID=" + docID + " missing count=" + (dim+1));
- // }
- counts[slot] = dim+1;
- }
- }
- }
- }
- */
+ docID = disi.nextDoc();
}
}
@@ -599,6 +512,33 @@ class DrillSidewaysScorer extends Scorer
}
}
+ private void collectHit(Collector collector, Collector[] sidewaysCollectors) throws IOException {
+ //if (DEBUG) {
+ // System.out.println(" hit");
+ //}
+
+ collector.collect(collectDocID);
+ if (drillDownCollector != null) {
+ drillDownCollector.collect(collectDocID);
+ }
+
+ // TODO: we could "fix" faceting of the sideways counts
+ // to do this "union" (of the drill down hits) in the
+ // end instead:
+
+ // Tally sideways counts:
+ for (int dim=0;dim<sidewaysCollectors.length;dim++) {
+ sidewaysCollectors[dim].collect(collectDocID);
+ }
+ }
+
+ private void collectNearMiss(Collector[] sidewaysCollectors, int dim) throws IOException {
+ //if (DEBUG) {
+ // System.out.println(" missingDim=" + dim);
+ //}
+ sidewaysCollectors[dim].collect(collectDocID);
+ }
+
@Override
public int docID() {
return collectDocID;
@@ -635,17 +575,16 @@ class DrillSidewaysScorer extends Scorer
}
static class DocsAndCost implements Comparable<DocsAndCost> {
- DocIdSetIterator[] disis;
- // Max cost for all docsEnums for this dim:
- long maxCost;
+ // Docs matching this dim's filter:
+ DocIdSetIterator disi;
Collector sidewaysCollector;
String dim;
@Override
public int compareTo(DocsAndCost other) {
- if (maxCost < other.maxCost) {
+ if (disi.cost() < other.disi.cost()) {
return -1;
- } else if (maxCost > other.maxCost) {
+ } else if (disi.cost() > other.disi.cost()) {
return 1;
} else {
return 0;
Modified: lucene/dev/branches/lucene5376/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java?rev=1561634&r1=1561633&r2=1561634&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java (original)
+++ lucene/dev/branches/lucene5376/lucene/facet/src/test/org/apache/lucene/facet/TestDrillSideways.java Mon Jan 27 11:11:23 2014
@@ -644,7 +644,7 @@ public class TestDrillSideways extends F
final FixedBitSet bits = new FixedBitSet(maxDoc);
for(int docID=0;docID < maxDoc;docID++) {
// Keeps only the even ids:
- if ((acceptDocs == null || acceptDocs.get(docID)) && ((Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0)) {
+ if ((acceptDocs == null || acceptDocs.get(docID)) && (Integer.parseInt(context.reader().document(docID).get("id")) & 1) == 0) {
bits.set(docID);
}
}
@@ -688,7 +688,7 @@ public class TestDrillSideways extends F
// subScorers are on the same docID:
if (!anyMultiValuedDrillDowns) {
// Can only do this test when there are no OR'd
- // drill-down values, beacuse in that case it's
+ // drill-down values, because in that case it's
// easily possible for one of the DD terms to be on
// a future docID:
new DrillSideways(s, config, tr) {
Modified: lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/IndexState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/IndexState.java?rev=1561634&r1=1561633&r2=1561634&view=diff
==============================================================================
--- lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/IndexState.java (original)
+++ lucene/dev/branches/lucene5376/lucene/server/src/java/org/apache/lucene/server/IndexState.java Mon Jan 27 11:11:23 2014
@@ -362,6 +362,9 @@ public class IndexState implements Close
@Override
public Similarity get(String name) {
+ if (internalFacetFieldNames.contains(name)) {
+ return defaultSim;
+ }
return getField(name).sim;
}
};