You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/07/27 11:36:32 UTC
svn commit: r1692848 - in /lucene/dev/trunk/lucene: ./
core/src/java/org/apache/lucene/index/
core/src/java/org/apache/lucene/search/ core/src/java/org/apache/lucene/util/
core/src/test/org/apache/lucene/search/
queryparser/src/java/org/apache/lucene/q...
Author: jpountz
Date: Mon Jul 27 09:36:31 2015
New Revision: 1692848
URL: http://svn.apache.org/r1692848
Log:
LUCENE-6695: Added BlendedTermQuery.
Added:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java (with props)
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestBlendedTermQuery.java (with props)
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java
lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1692848&r1=1692847&r2=1692848&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Jul 27 09:36:31 2015
@@ -144,6 +144,9 @@ New Features
* LUCENE-6694: Add LithuanianAnalyzer and LithuanianStemmer.
(Dainius Jocas via Robert Muir)
+* LUCENE-6695: Added a new BlendedTermQuery to blend statistics across several
+ terms. (Simon Willnauer, Adrien Grand)
+
API Changes
* LUCENE-6508: Simplify Lock api, there is now just
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java?rev=1692848&r1=1692847&r2=1692848&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/TermContext.java Mon Jul 27 09:36:31 2015
@@ -17,7 +17,6 @@ package org.apache.lucene.index;
* limitations under the License.
*/
-import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
Added: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java?rev=1692848&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java (added)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/BlendedTermQuery.java Mon Jul 27 09:36:31 2015
@@ -0,0 +1,320 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermContext;
+import org.apache.lucene.index.TermState;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.InPlaceMergeSorter;
+
+/**
+ * A {@link Query} that blends index statistics across multiple terms.
+ * This is particularly useful when several terms should produce identical
+ * scores, regardless of their index statistics.
+ * <p>For instance imagine that you are resolving synonyms at search time,
+ * all terms should produce identical scores instead of the default behavior,
+ * which tends to give higher scores to rare terms.
+ * <p>An other useful use-case is cross-field search: imagine that you would
+ * like to search for {@code john} on two fields: {@code first_name} and
+ * {@code last_name}. You might not want to give a higher weight to matches
+ * on the field where {@code john} is rarer, in which case
+ * {@link BlendedTermQuery} would help as well.
+ * @lucene.experimental
+ */
+public final class BlendedTermQuery extends Query {
+
+ /** A Builder for {@link BlendedTermQuery}. */
+ public static class Builder {
+
+ private int numTerms = 0;
+ private Term[] terms = new Term[0];
+ private float[] boosts = new float[0];
+ private TermContext[] contexts = new TermContext[0];
+ private RewriteMethod rewriteMethod = DISJUNCTION_MAX_REWRITE;
+
+ /** Sole constructor. */
+ public Builder() {}
+
+ /** Set the {@link RewriteMethod}. Default is to use
+ * {@link BlendedTermQuery#DISJUNCTION_MAX_REWRITE}.
+ * @see RewriteMethod */
+ public Builder setRewriteMethod(RewriteMethod rewiteMethod) {
+ this.rewriteMethod = rewiteMethod;
+ return this;
+ }
+
+ /** Add a new {@link Term} to this builder, with a default boost of {@code 1}.
+ * @see #add(Term, float) */
+ public Builder add(Term term) {
+ return add(term, 1f);
+ }
+
+ /** Add a {@link Term} with the provided boost. The higher the boost, the
+ * more this term will contribute to the overall score of the
+ * {@link BlendedTermQuery}. */
+ public Builder add(Term term, float boost) {
+ return add(term, boost, null);
+ }
+
+ /**
+ * Expert: Add a {@link Term} with the provided boost and context.
+ * This method is useful if you already have a {@link TermContext}
+ * object constructed for the given term.
+ */
+ public Builder add(Term term, float boost, TermContext context) {
+ if (numTerms >= BooleanQuery.getMaxClauseCount()) {
+ throw new BooleanQuery.TooManyClauses();
+ }
+ terms = ArrayUtil.grow(terms, numTerms + 1);
+ boosts = ArrayUtil.grow(boosts, numTerms + 1);
+ contexts = ArrayUtil.grow(contexts, numTerms + 1);
+ terms[numTerms] = new Term(term.field(), BytesRef.deepCopyOf(term.bytes()));
+ boosts[numTerms] = boost;
+ contexts[numTerms] = context;
+ numTerms += 1;
+ return this;
+ }
+
+ /** Build the {@link BlendedTermQuery}. */
+ public BlendedTermQuery build() {
+ return new BlendedTermQuery(
+ Arrays.copyOf(terms, numTerms),
+ Arrays.copyOf(boosts, numTerms),
+ Arrays.copyOf(contexts, numTerms),
+ rewriteMethod);
+ }
+
+ }
+
+ /** A {@link RewriteMethod} defines how queries for individual terms should
+ * be merged.
+ * @lucene.experimental
+ * @see BlendedTermQuery#BOOLEAN_REWRITE
+ * @see BlendedTermQuery.DisjunctionMaxRewrite */
+ public static abstract class RewriteMethod {
+
+ /** Sole constructor */
+ protected RewriteMethod() {}
+
+ /** Merge the provided sub queries into a single {@link Query} object. */
+ public abstract Query rewrite(Query[] subQueries);
+
+ }
+
+ /**
+ * A {@link RewriteMethod} that adds all sub queries to a {@link BooleanQuery}
+ * which has {@link BooleanQuery#isCoordDisabled() coords disabled}. This
+ * {@link RewriteMethod} is useful when matching on several fields is
+ * considered better than having a good match on a single field.
+ */
+ public static final RewriteMethod BOOLEAN_REWRITE = new RewriteMethod() {
+ @Override
+ public Query rewrite(Query[] subQueries) {
+ BooleanQuery.Builder merged = new BooleanQuery.Builder();
+ merged.setDisableCoord(true);
+ for (Query query : subQueries) {
+ merged.add(query, Occur.SHOULD);
+ }
+ return merged.build();
+ }
+ };
+
+ /**
+ * A {@link RewriteMethod} that creates a {@link DisjunctionMaxQuery} out
+ * of the sub queries. This {@link RewriteMethod} is useful when having a
+ * good match on a single field is considered better than having average
+ * matches on several fields.
+ */
+ public static class DisjunctionMaxRewrite extends RewriteMethod {
+
+ private final float tieBreakerMultiplier;
+
+ /** This {@link RewriteMethod} will create {@link DisjunctionMaxQuery}
+ * instances that have the provided tie breaker.
+ * @see DisjunctionMaxQuery */
+ public DisjunctionMaxRewrite(float tieBreakerMultiplier) {
+ this.tieBreakerMultiplier = tieBreakerMultiplier;
+ }
+
+ @Override
+ public Query rewrite(Query[] subQueries) {
+ return new DisjunctionMaxQuery(Arrays.asList(subQueries), tieBreakerMultiplier);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null || getClass() != obj.getClass()) {
+ return false;
+ }
+ DisjunctionMaxRewrite that = (DisjunctionMaxRewrite) obj;
+ return tieBreakerMultiplier == that.tieBreakerMultiplier;
+ }
+
+ @Override
+ public int hashCode() {
+ return 31 * getClass().hashCode() + Float.floatToIntBits(tieBreakerMultiplier);
+ }
+
+ }
+
+ /** {@link DisjunctionMaxRewrite} instance with a tie-breaker of {@code 0.01}. */
+ public static final RewriteMethod DISJUNCTION_MAX_REWRITE = new DisjunctionMaxRewrite(0.01f);
+
+ private final Term[] terms;
+ private final float[] boosts;
+ private final TermContext[] contexts;
+ private final RewriteMethod rewriteMethod;
+
+ private BlendedTermQuery(Term[] terms, float[] boosts, TermContext[] contexts,
+ RewriteMethod rewriteMethod) {
+ assert terms.length == boosts.length;
+ assert terms.length == contexts.length;
+ this.terms = terms;
+ this.boosts = boosts;
+ this.contexts = contexts;
+ this.rewriteMethod = rewriteMethod;
+
+ // we sort terms so that equals/hashcode does not rely on the order
+ new InPlaceMergeSorter() {
+
+ @Override
+ protected void swap(int i, int j) {
+ Term tmpTerm = terms[i];
+ terms[i] = terms[j];
+ terms[j] = tmpTerm;
+
+ TermContext tmpContext = contexts[i];
+ contexts[i] = contexts[j];
+ contexts[j] = tmpContext;
+
+ float tmpBoost = boosts[i];
+ boosts[i] = boosts[j];
+ boosts[j] = tmpBoost;
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ return terms[i].compareTo(terms[j]);
+ }
+ }.sort(0, terms.length);
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (super.equals(obj) == false) {
+ return false;
+ }
+ BlendedTermQuery that = (BlendedTermQuery) obj;
+ return Arrays.equals(terms, that.terms)
+ && Arrays.equals(contexts, that.contexts)
+ && Arrays.equals(boosts, that.boosts)
+ && rewriteMethod.equals(that.rewriteMethod);
+ }
+
+ @Override
+ public int hashCode() {
+ int h = super.hashCode();
+ h = 31 * h + Arrays.hashCode(terms);
+ h = 31 * h + Arrays.hashCode(contexts);
+ h = 31 * h + Arrays.hashCode(boosts);
+ h = 31 * h + rewriteMethod.hashCode();
+ return h;
+ }
+
+ @Override
+ public String toString(String field) {
+ StringBuilder builder = new StringBuilder("Blended(");
+ for (int i = 0; i < terms.length; ++i) {
+ if (i != 0) {
+ builder.append(" ");
+ }
+ TermQuery termQuery = new TermQuery(terms[i]);
+ termQuery.setBoost(boosts[i]);
+ builder.append(termQuery.toString(field));
+ }
+ builder.append(")");
+ return builder.toString();
+ }
+
+ @Override
+ public final Query rewrite(IndexReader reader) throws IOException {
+ final TermContext[] contexts = Arrays.copyOf(this.contexts, this.contexts.length);
+ for (int i = 0; i < contexts.length; ++i) {
+ if (contexts[i] == null || contexts[i].topReaderContext != reader.getContext()) {
+ contexts[i] = TermContext.build(reader.getContext(), terms[i]);
+ }
+ }
+
+ // Compute aggregated doc freq and total term freq
+ // df will be the max of all doc freqs
+ // ttf will be the sum of all total term freqs
+ int df = 0;
+ long ttf = 0;
+ for (TermContext ctx : contexts) {
+ df = Math.max(df, ctx.docFreq());
+ if (ctx.totalTermFreq() == -1L) {
+ ttf = -1L;
+ } else if (ttf != -1L) {
+ ttf += ctx.totalTermFreq();
+ }
+ }
+
+ for (int i = 0; i < contexts.length; ++i) {
+ contexts[i] = adjustFrequencies(contexts[i], df, ttf);
+ }
+
+ TermQuery[] termQueries = new TermQuery[terms.length];
+ for (int i = 0; i < terms.length; ++i) {
+ termQueries[i] = new TermQuery(terms[i], contexts[i]);
+ termQueries[i].setBoost(boosts[i]);
+ }
+ Query rewritten = rewriteMethod.rewrite(termQueries);
+ rewritten.setBoost(getBoost());
+ return rewritten;
+ }
+
+ private static TermContext adjustFrequencies(TermContext ctx, int artificialDf, long artificialTtf) {
+ List<LeafReaderContext> leaves = ctx.topReaderContext.leaves();
+ final int len;
+ if (leaves == null) {
+ len = 1;
+ } else {
+ len = leaves.size();
+ }
+ TermContext newCtx = new TermContext(ctx.topReaderContext);
+ for (int i = 0; i < len; ++i) {
+ TermState termState = ctx.get(i);
+ if (termState == null) {
+ continue;
+ }
+ newCtx.register(termState, i);
+ }
+ newCtx.accumulateStatistics(artificialDf, artificialTtf);
+ return newCtx;
+ }
+
+}
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java?rev=1692848&r1=1692847&r2=1692848&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/MultiTermQuery.java Mon Jul 27 09:36:31 2015
@@ -18,16 +18,13 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
-import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.FilteredTermsEnum; // javadocs
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SingleTermsEnum; // javadocs
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
-import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanQuery.Builder;
@@ -194,7 +191,7 @@ public abstract class MultiTermQuery ext
* @see #setRewriteMethod
*/
public static final class TopTermsBlendedFreqScoringRewrite extends
- TopTermsRewrite<BooleanQuery.Builder> {
+ TopTermsRewrite<BlendedTermQuery.Builder> {
/**
* Create a TopTermsBlendedScoringBooleanQueryRewrite for at most
@@ -213,77 +210,22 @@ public abstract class MultiTermQuery ext
}
@Override
- protected BooleanQuery.Builder getTopLevelBuilder() {
- BooleanQuery.Builder builder = new BooleanQuery.Builder();
- builder.setDisableCoord(true);
+ protected BlendedTermQuery.Builder getTopLevelBuilder() {
+ BlendedTermQuery.Builder builder = new BlendedTermQuery.Builder();
+ builder.setRewriteMethod(BlendedTermQuery.BOOLEAN_REWRITE);
return builder;
}
@Override
- protected Query build(Builder builder) {
+ protected Query build(BlendedTermQuery.Builder builder) {
return builder.build();
}
@Override
- protected void addClause(BooleanQuery.Builder topLevel, Term term, int docCount,
+ protected void addClause(BlendedTermQuery.Builder topLevel, Term term, int docCount,
float boost, TermContext states) {
- final TermQuery tq = new TermQuery(term, states);
- tq.setBoost(boost);
- topLevel.add(tq, BooleanClause.Occur.SHOULD);
- }
-
- @Override
- void adjustScoreTerms(IndexReader reader, ScoreTerm[] scoreTerms) {
- if (scoreTerms.length <= 1) {
- return;
- }
- int maxDf = 0;
- long maxTtf = 0;
- for (ScoreTerm scoreTerm : scoreTerms) {
- TermContext ctx = scoreTerm.termState;
- int df = ctx.docFreq();
- maxDf = Math.max(df, maxDf);
- long ttf = ctx.totalTermFreq();
- maxTtf = ttf == -1 || maxTtf == -1 ? -1 : Math.max(ttf, maxTtf);
- }
-
- assert maxDf >= 0 : "DF must be >= 0";
- if (maxDf == 0) {
- return; // we are done that term doesn't exist at all
- }
- assert (maxTtf == -1) || (maxTtf >= maxDf);
-
- for (int i = 0; i < scoreTerms.length; i++) {
- TermContext ctx = scoreTerms[i].termState;
- ctx = adjustFrequencies(ctx, maxDf, maxTtf);
-
- ScoreTerm adjustedScoreTerm = new ScoreTerm(ctx);
- adjustedScoreTerm.boost = scoreTerms[i].boost;
- adjustedScoreTerm.bytes.copyBytes(scoreTerms[i].bytes);
- scoreTerms[i] = adjustedScoreTerm;
- }
- }
- }
-
- private static TermContext adjustFrequencies(TermContext ctx, int artificialDf,
- long artificialTtf) {
- List<LeafReaderContext> leaves = ctx.topReaderContext.leaves();
- final int len;
- if (leaves == null) {
- len = 1;
- } else {
- len = leaves.size();
- }
- TermContext newCtx = new TermContext(ctx.topReaderContext);
- for (int i = 0; i < len; ++i) {
- TermState termState = ctx.get(i);
- if (termState == null) {
- continue;
- }
- newCtx.register(termState, i);
+ topLevel.add(term, boost, states);
}
- newCtx.accumulateStatistics(artificialDf, artificialTtf);
- return newCtx;
}
/**
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java?rev=1692848&r1=1692847&r2=1692848&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/TopTermsRewrite.java Mon Jul 27 09:36:31 2015
@@ -157,8 +157,6 @@ public abstract class TopTermsRewrite<B>
final B b = getTopLevelBuilder();
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
ArrayUtil.timSort(scoreTerms, scoreTermSortByTermComp);
-
- adjustScoreTerms(reader, scoreTerms);
for (final ScoreTerm st : scoreTerms) {
final Term term = new Term(query.field, st.bytes.toBytesRef());
@@ -167,10 +165,6 @@ public abstract class TopTermsRewrite<B>
return build(b);
}
- void adjustScoreTerms(IndexReader reader, ScoreTerm[] scoreTerms) {
- //no-op but allows subclasses the ability to tweak the score terms used in ranking e.g. balancing IDF.
- }
-
@Override
public int hashCode() {
return 31 * size;
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java?rev=1692848&r1=1692847&r2=1692848&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java Mon Jul 27 09:36:31 2015
@@ -17,6 +17,7 @@ package org.apache.lucene.util;
* limitations under the License.
*/
+import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
@@ -236,6 +237,14 @@ public final class ArrayUtil {
return currentSize;
}
+ public static <T> T[] grow(T[] array, int minSize) {
+ assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?";
+ if (array.length < minSize) {
+ return Arrays.copyOf(array, oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
+ } else
+ return array;
+ }
+
public static short[] grow(short[] array, int minSize) {
assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?";
if (array.length < minSize) {
Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestBlendedTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestBlendedTermQuery.java?rev=1692848&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestBlendedTermQuery.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/TestBlendedTermQuery.java Mon Jul 27 09:36:31 2015
@@ -0,0 +1,122 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestBlendedTermQuery extends LuceneTestCase {
+
+ public void testEquals() {
+ Term t1 = new Term("foo", "bar");
+
+ BlendedTermQuery bt1 = new BlendedTermQuery.Builder()
+ .add(t1)
+ .build();
+ BlendedTermQuery bt2 = new BlendedTermQuery.Builder()
+ .add(t1)
+ .build();
+ QueryUtils.checkEqual(bt1, bt2);
+
+ bt1 = new BlendedTermQuery.Builder()
+ .setRewriteMethod(BlendedTermQuery.BOOLEAN_REWRITE)
+ .add(t1)
+ .build();
+ bt2 = new BlendedTermQuery.Builder()
+ .setRewriteMethod(BlendedTermQuery.DISJUNCTION_MAX_REWRITE)
+ .add(t1)
+ .build();
+ QueryUtils.checkUnequal(bt1, bt2);
+
+ Term t2 = new Term("foo", "baz");
+
+ bt1 = new BlendedTermQuery.Builder()
+ .add(t1)
+ .add(t2)
+ .build();
+ bt2 = new BlendedTermQuery.Builder()
+ .add(t2)
+ .add(t1)
+ .build();
+ QueryUtils.checkEqual(bt1, bt2);
+
+ float boost1 = random().nextFloat();
+ float boost2 = random().nextFloat();
+ bt1 = new BlendedTermQuery.Builder()
+ .add(t1, boost1)
+ .add(t2, boost2)
+ .build();
+ bt2 = new BlendedTermQuery.Builder()
+ .add(t2, boost2)
+ .add(t1, boost1)
+ .build();
+ QueryUtils.checkEqual(bt1, bt2);
+ }
+
+ public void testToString() {
+ assertEquals("Blended()", new BlendedTermQuery.Builder().build().toString());
+ Term t1 = new Term("foo", "bar");
+ assertEquals("Blended(foo:bar)", new BlendedTermQuery.Builder().add(t1).build().toString());
+ Term t2 = new Term("foo", "baz");
+ assertEquals("Blended(foo:bar foo:baz)", new BlendedTermQuery.Builder().add(t1).add(t2).build().toString());
+ assertEquals("Blended(foo:bar^4.0 foo:baz^3.0)", new BlendedTermQuery.Builder().add(t1, 4).add(t2, 3).build().toString());
+ }
+
+ public void testBlendedScores() throws IOException {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+
+ Document doc = new Document();
+ doc.add(new StringField("f", "a", Store.NO));
+ w.addDocument(doc);
+
+ doc = new Document();
+ doc.add(new StringField("f", "b", Store.NO));
+ for (int i = 0; i < 10; ++i) {
+ w.addDocument(doc);
+ }
+
+ IndexReader reader = w.getReader();
+ IndexSearcher searcher = newSearcher(reader);
+ BlendedTermQuery query = new BlendedTermQuery.Builder()
+ .setRewriteMethod(new BlendedTermQuery.DisjunctionMaxRewrite(0f))
+ .add(new Term("f", "a"))
+ .add(new Term("f", "b"))
+ .build();
+
+ TopDocs topDocs = searcher.search(query, 20);
+ assertEquals(11, topDocs.totalHits);
+ // All docs must have the same score
+ for (int i = 0; i < topDocs.scoreDocs.length; ++i) {
+ assertEquals(topDocs.scoreDocs[0].score, topDocs.scoreDocs[i].score, 0.0f);
+ }
+
+ reader.close();
+ w.close();
+ dir.close();
+ }
+
+}
Modified: lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java?rev=1692848&r1=1692847&r2=1692848&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java (original)
+++ lucene/dev/trunk/lucene/queryparser/src/java/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java Mon Jul 27 09:36:31 2015
@@ -19,7 +19,6 @@ package org.apache.lucene.queryparser.co
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Collection;
import java.util.Iterator;
import java.util.List;
@@ -30,6 +29,7 @@ import org.apache.lucene.queryparser.cla
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
@@ -273,7 +273,7 @@ public class ComplexPhraseQueryParser ex
// HashSet bclauseterms=new HashSet();
Query qc = clause.getQuery();
// Rewrite this clause e.g one* becomes (one OR onerous)
- qc = qc.rewrite(reader);
+ qc = new IndexSearcher(reader).rewrite(qc);
if (clause.getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
numNegatives++;
}