You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2015/05/07 13:16:06 UTC
svn commit: r1678167 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/index/
lucene/core/src/java/org/apache/lucene/store/ lucene/queries/
lucene/queries/src/java/org/apache/lucene/queries/ lucene/quer...
Author: jpountz
Date: Thu May 7 11:16:06 2015
New Revision: 1678167
URL: http://svn.apache.org/r1678167
Log:
LUCENE-6350: TermsQuery is now compressed with PrefixCodedTerms.
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/store/RAMFile.java
lucene/dev/branches/branch_5x/lucene/queries/ (props changed)
lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1678167&r1=1678166&r2=1678167&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Thu May 7 11:16:06 2015
@@ -94,6 +94,9 @@ Optimizations
* LUCENE-6330: BooleanScorer (used for top-level disjunctions) does not decode
norms when not necessary anymore. (Adrien Grand)
+* LUCENE-6350: TermsQuery is now compressed with PrefixCodedTerms.
+ (Robert Muir, Mike McCandless, Adrien Grand)
+
Bug Fixes
* LUCENE-6378: Fix all RuntimeExceptions to throw the underlying root cause.
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java?rev=1678167&r1=1678166&r2=1678167&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java Thu May 7 11:16:06 2015
@@ -19,8 +19,6 @@ package org.apache.lucene.index;
import org.apache.lucene.util.BytesRefIterator;
-// TODO: maybe TermsFilter could use this?
-
/** Iterates over terms in across multiple fields. The caller must
* check {@link #field} after each {@link #next} to see if the field
* changed, but {@code ==} can be used since the iterator
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java?rev=1678167&r1=1678166&r2=1678167&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java Thu May 7 11:16:06 2015
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
+import java.util.Objects;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RAMFile;
@@ -31,14 +32,14 @@ import org.apache.lucene.util.BytesRefBu
/**
* Prefix codes term instances (prefixes are shared)
- * @lucene.experimental
+ * @lucene.internal
*/
-class PrefixCodedTerms implements Accountable {
+public class PrefixCodedTerms implements Accountable {
final RAMFile buffer;
private long delGen;
private PrefixCodedTerms(RAMFile buffer) {
- this.buffer = buffer;
+ this.buffer = Objects.requireNonNull(buffer);
}
@Override
@@ -63,6 +64,9 @@ class PrefixCodedTerms implements Accoun
private Term lastTerm = new Term("");
private BytesRefBuilder lastTermBytes = new BytesRefBuilder();
+ /** Sole constructor. */
+ public Builder() {}
+
/** add a term */
public void add(Term term) {
assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0;
@@ -111,6 +115,7 @@ class PrefixCodedTerms implements Accoun
}
}
+ /** An iterator over the list of terms stored in a {@link PrefixCodedTerms}. */
public static class TermIterator extends FieldTermIterator {
final IndexInput input;
final BytesRefBuilder builder = new BytesRefBuilder();
@@ -119,7 +124,7 @@ class PrefixCodedTerms implements Accoun
final long delGen;
String field = "";
- public TermIterator(long delGen, RAMFile buffer) {
+ private TermIterator(long delGen, RAMFile buffer) {
try {
input = new RAMInputStream("MergedPrefixCodedTermsIterator", buffer);
} catch (IOException e) {
@@ -169,7 +174,24 @@ class PrefixCodedTerms implements Accoun
}
}
+ /** Return an iterator over the terms stored in this {@link PrefixCodedTerms}. */
public TermIterator iterator() {
return new TermIterator(delGen, buffer);
}
+
+ @Override
+ public int hashCode() {
+ int h = buffer.hashCode();
+ h = 31 * h + (int) (delGen ^ (delGen >>> 32));
+ return h;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (obj == null) return false;
+ if (getClass() != obj.getClass()) return false;
+ PrefixCodedTerms other = (PrefixCodedTerms) obj;
+ return buffer.equals(other.buffer) && delGen == other.delGen;
+ }
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/store/RAMFile.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/store/RAMFile.java?rev=1678167&r1=1678166&r2=1678167&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/store/RAMFile.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/store/RAMFile.java Thu May 7 11:16:06 2015
@@ -20,6 +20,7 @@ package org.apache.lucene.store;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.Arrays;
import org.apache.lucene.util.Accountable;
@@ -27,7 +28,7 @@ import org.apache.lucene.util.Accountabl
* Represents a file in RAM as a list of byte[] buffers.
* @lucene.internal */
public class RAMFile implements Accountable {
- protected ArrayList<byte[]> buffers = new ArrayList<>();
+ protected final ArrayList<byte[]> buffers = new ArrayList<>();
long length;
RAMDirectory directory;
protected long sizeInBytes;
@@ -93,4 +94,31 @@ public class RAMFile implements Accounta
public String toString() {
return getClass().getSimpleName() + "(length=" + length + ")";
}
+
+ @Override
+ public int hashCode() {
+ int h = (int) (length ^ (length >>> 32));
+ for (byte[] block : buffers) {
+ h = 31 * h + Arrays.hashCode(block);
+ }
+ return h;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (obj == null) return false;
+ if (getClass() != obj.getClass()) return false;
+ RAMFile other = (RAMFile) obj;
+ if (length != other.length) return false;
+ if (buffers.size() != other.buffers.size()) {
+ return false;
+ }
+ for (int i = 0; i < buffers.size(); i++) {
+ if (!Arrays.equals(buffers.get(i), other.buffers.get(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
}
Modified: lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java?rev=1678167&r1=1678166&r2=1678167&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java (original)
+++ lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/TermsQuery.java Thu May 7 11:16:06 2015
@@ -18,11 +18,9 @@ package org.apache.lucene.queries;
*/
import java.io.IOException;
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
-import java.util.Iterator;
import java.util.List;
import java.util.Set;
@@ -30,13 +28,15 @@ import org.apache.lucene.index.Fields;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.PrefixCodedTerms;
+import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
@@ -73,38 +73,35 @@ public class TermsQuery extends Query im
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermsQuery.class);
- /*
- * this class is often used for large number of terms in a single field.
- * to optimize for this case and to be filter-cache friendly we
- * serialize all terms into a single byte array and store offsets
- * in a parallel array to keep the # of object constant and speed up
- * equals / hashcode.
- *
- * This adds quite a bit of complexity but allows large term queries to
- * be efficient for GC and cache-lookups
- */
- private final int[] offsets;
- private final byte[] termsBytes;
- private final TermsAndField[] termsAndFields;
- private final int hashCode; // cached hashcode for fast cache lookups, not including the boost
+ private final PrefixCodedTerms termData;
+ private final int termDataHashCode; // cached hashcode of termData
+
+ private static Term[] toTermArray(String field, List<BytesRef> termBytes) {
+ Term[] array = new Term[termBytes.size()];
+ int i = 0;
+ for (BytesRef t : termBytes) {
+ array[i++] = new Term(field, t);
+ }
+ return array;
+ }
/**
* Creates a new {@link TermsQuery} from the given list. The list
* can contain duplicate terms and multiple fields.
*/
public TermsQuery(final List<Term> terms) {
- this(new FieldAndTermEnum() {
- // we need to sort for deduplication and to have a common cache key
- final Iterator<Term> iter = sort(terms).iterator();
- @Override
- public BytesRef next() {
- if (iter.hasNext()) {
- Term next = iter.next();
- field = next.field();
- return next.bytes();
- }
- return null;
- }}, terms.size());
+ Term[] sortedTerms = terms.toArray(new Term[terms.size()]);
+ ArrayUtil.timSort(sortedTerms);
+ PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
+ Term previous = null;
+ for (Term term : sortedTerms) {
+ if (term.equals(previous) == false) {
+ builder.add(term);
+ }
+ previous = term;
+ }
+ termData = builder.finish();
+ termDataHashCode = termData.hashCode();
}
/**
@@ -112,17 +109,7 @@ public class TermsQuery extends Query im
* a single field.
*/
public TermsQuery(final String field, final List<BytesRef> terms) {
- this(new FieldAndTermEnum(field) {
- // we need to sort for deduplication and to have a common cache key
- final Iterator<BytesRef> iter = sort(terms).iterator();
- @Override
- public BytesRef next() {
- if (iter.hasNext()) {
- return iter.next();
- }
- return null;
- }
- }, terms.size());
+ this(toTermArray(field, terms));
}
/**
@@ -142,106 +129,37 @@ public class TermsQuery extends Query im
this(Arrays.asList(terms));
}
- private TermsQuery(FieldAndTermEnum iter, int length) {
- // TODO: maybe use oal.index.PrefixCodedTerms instead?
- // If number of terms is more than a few hundred it
- // should be a win
-
- // TODO: we also pack terms in FieldCache/DocValues
- // ... maybe we can refactor to share that code
-
- // TODO: yet another option is to build the union of the terms in
- // an automaton an call intersect on the termsenum if the density is high
-
- int hash = 9;
- byte[] serializedTerms = new byte[0];
- this.offsets = new int[length+1];
- int lastEndOffset = 0;
- int index = 0;
- ArrayList<TermsAndField> termsAndFields = new ArrayList<>();
- TermsAndField lastTermsAndField = null;
- BytesRef previousTerm = null;
- String previousField = null;
- BytesRef currentTerm;
- String currentField;
- while((currentTerm = iter.next()) != null) {
- currentField = iter.field();
- if (currentField == null) {
- throw new IllegalArgumentException("Field must not be null");
- }
- if (previousField != null) {
- // deduplicate
- if (previousField.equals(currentField)) {
- if (previousTerm.bytesEquals(currentTerm)){
- continue;
- }
- } else {
- final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
- lastTermsAndField = new TermsAndField(start, index, previousField);
- termsAndFields.add(lastTermsAndField);
- }
- }
- hash = 31 * hash + currentField.hashCode();
- hash = 31 * hash + currentTerm.hashCode();
- if (serializedTerms.length < lastEndOffset+currentTerm.length) {
- serializedTerms = ArrayUtil.grow(serializedTerms, lastEndOffset+currentTerm.length);
- }
- System.arraycopy(currentTerm.bytes, currentTerm.offset, serializedTerms, lastEndOffset, currentTerm.length);
- offsets[index] = lastEndOffset;
- lastEndOffset += currentTerm.length;
- index++;
- previousTerm = currentTerm;
- previousField = currentField;
- }
- offsets[index] = lastEndOffset;
- final int start = lastTermsAndField == null ? 0 : lastTermsAndField.end;
- lastTermsAndField = new TermsAndField(start, index, previousField);
- termsAndFields.add(lastTermsAndField);
- this.termsBytes = ArrayUtil.shrink(serializedTerms, lastEndOffset);
- this.termsAndFields = termsAndFields.toArray(new TermsAndField[termsAndFields.size()]);
- this.hashCode = hash;
- }
-
@Override
public boolean equals(Object obj) {
+ if (this == obj) {
+ return true;
+ }
if (!super.equals(obj)) {
return false;
}
TermsQuery that = (TermsQuery) obj;
- // first check the fields before even comparing the bytes
- if (that.hashCode == hashCode && getBoost() == that.getBoost() && Arrays.equals(termsAndFields, that.termsAndFields)) {
- int lastOffset = termsAndFields[termsAndFields.length - 1].end;
- // compare offsets since we sort they must be identical
- if (ArrayUtil.equals(offsets, 0, that.offsets, 0, lastOffset + 1)) {
- // straight byte comparison since we sort they must be identical
- return ArrayUtil.equals(termsBytes, 0, that.termsBytes, 0, offsets[lastOffset]);
- }
- }
- return false;
+ // termData might be heavy to compare so check the hash code first
+ return termDataHashCode == that.termDataHashCode
+ && termData.equals(that.termData);
}
@Override
public int hashCode() {
- return super.hashCode() ^ this.hashCode;
+ return 31 * super.hashCode() + termDataHashCode;
}
@Override
public String toString(String defaultField) {
StringBuilder builder = new StringBuilder();
- BytesRef spare = new BytesRef(termsBytes);
boolean first = true;
- for (int i = 0; i < termsAndFields.length; i++) {
- TermsAndField current = termsAndFields[i];
- for (int j = current.start; j < current.end; j++) {
- spare.offset = offsets[j];
- spare.length = offsets[j+1] - offsets[j];
- if (!first) {
- builder.append(' ');
- }
- first = false;
- builder.append(current.field).append(':');
- builder.append(spare.utf8ToString());
- }
+ TermIterator iterator = termData.iterator();
+ for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
+ if (!first) {
+ builder.append(' ');
+ }
+ first = false;
+ builder.append(iterator.field()).append(':');
+ builder.append(term.utf8ToString());
}
builder.append(ToStringUtils.boost(getBoost()));
@@ -250,10 +168,7 @@ public class TermsQuery extends Query im
@Override
public long ramBytesUsed() {
- return BASE_RAM_BYTES_USED
- + RamUsageEstimator.sizeOf(termsAndFields)
- + RamUsageEstimator.sizeOf(termsBytes)
- + RamUsageEstimator.sizeOf(offsets);
+ return BASE_RAM_BYTES_USED + termData.ramBytesUsed();
}
@Override
@@ -261,95 +176,9 @@ public class TermsQuery extends Query im
return Collections.emptyList();
}
- private static final class TermsAndField implements Accountable {
-
- private static final long BASE_RAM_BYTES_USED =
- RamUsageEstimator.shallowSizeOfInstance(TermsAndField.class)
- + RamUsageEstimator.shallowSizeOfInstance(String.class)
- + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // header of the array held by the String
-
- final int start;
- final int end;
- final String field;
-
-
- TermsAndField(int start, int end, String field) {
- super();
- this.start = start;
- this.end = end;
- this.field = field;
- }
-
- @Override
- public long ramBytesUsed() {
- // this is an approximation since we don't actually know how strings store
- // their data, which can be JVM-dependent
- return BASE_RAM_BYTES_USED + field.length() * RamUsageEstimator.NUM_BYTES_CHAR;
- }
-
- @Override
- public Collection<Accountable> getChildResources() {
- return Collections.emptyList();
- }
-
- @Override
- public int hashCode() {
- final int prime = 31;
- int result = 1;
- result = prime * result + ((field == null) ? 0 : field.hashCode());
- result = prime * result + end;
- result = prime * result + start;
- return result;
- }
-
- @Override
- public boolean equals(Object obj) {
- if (this == obj) return true;
- if (obj == null) return false;
- if (getClass() != obj.getClass()) return false;
- TermsAndField other = (TermsAndField) obj;
- if (field == null) {
- if (other.field != null) return false;
- } else if (!field.equals(other.field)) return false;
- if (end != other.end) return false;
- if (start != other.start) return false;
- return true;
- }
-
- }
-
- private static abstract class FieldAndTermEnum {
- protected String field;
-
- public abstract BytesRef next();
-
- public FieldAndTermEnum() {}
-
- public FieldAndTermEnum(String field) { this.field = field; }
-
- public String field() {
- return field;
- }
- }
-
- /*
- * simple utility that returns the in-place sorted list
- */
- private static <T extends Comparable<? super T>> List<T> sort(List<T> toSort) {
- if (toSort.isEmpty()) {
- throw new IllegalArgumentException("no terms provided");
- }
- Collections.sort(toSort);
- return toSort;
- }
-
@Override
- public Weight createWeight(IndexSearcher searcher, boolean needsScores)
- throws IOException {
- return new Weight(this) {
-
- private float queryNorm;
- private float queryWeight;
+ public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
+ return new ConstantScoreWeight(this) {
@Override
public void extractTerms(Set<Term> terms) {
@@ -360,51 +189,30 @@ public class TermsQuery extends Query im
}
@Override
- public float getValueForNormalization() throws IOException {
- queryWeight = getBoost();
- return queryWeight * queryWeight;
- }
-
- @Override
- public void normalize(float norm, float topLevelBoost) {
- queryNorm = norm * topLevelBoost;
- queryWeight *= queryNorm;
- }
-
- @Override
- public Explanation explain(LeafReaderContext context, int doc) throws IOException {
- final Scorer s = scorer(context, context.reader().getLiveDocs());
- final boolean exists = (s != null && s.advance(doc) == doc);
-
- if (exists) {
- return Explanation.match(queryWeight, TermsQuery.this.toString() + ", product of:",
- Explanation.match(getBoost(), "boost"), Explanation.match(queryNorm, "queryNorm"));
- } else {
- return Explanation.noMatch(TermsQuery.this.toString() + " doesn't match id " + doc);
- }
- }
-
- @Override
- public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
+ public Scorer scorer(LeafReaderContext context, Bits acceptDocs, final float score) throws IOException {
final LeafReader reader = context.reader();
BitDocIdSet.Builder builder = new BitDocIdSet.Builder(reader.maxDoc());
final Fields fields = reader.fields();
- final BytesRef spare = new BytesRef(termsBytes);
+ String lastField = null;
Terms terms = null;
TermsEnum termsEnum = null;
PostingsEnum docs = null;
- for (TermsAndField termsAndField : termsAndFields) {
- if ((terms = fields.terms(termsAndField.field)) != null) {
- termsEnum = terms.iterator(); // this won't return null
- for (int i = termsAndField.start; i < termsAndField.end; i++) {
- spare.offset = offsets[i];
- spare.length = offsets[i+1] - offsets[i];
- if (termsEnum.seekExact(spare)) {
- docs = termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE); // no freq since we don't need them
- builder.or(docs);
- }
+ TermIterator iterator = termData.iterator();
+ for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
+ String field = iterator.field();
+ // comparing references is fine here
+ if (field != lastField) {
+ terms = fields.terms(field);
+ if (terms == null) {
+ termsEnum = null;
+ } else {
+ termsEnum = terms.iterator();
}
}
+ if (termsEnum != null && termsEnum.seekExact(term)) {
+ docs = termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE);
+ builder.or(docs);
+ }
}
BitDocIdSet result = builder.build();
if (result == null) {
@@ -412,11 +220,15 @@ public class TermsQuery extends Query im
}
final DocIdSetIterator disi = result.iterator();
+ if (disi == null) {
+ return null;
+ }
+
return new Scorer(this) {
@Override
public float score() throws IOException {
- return queryWeight;
+ return score;
}
@Override
Modified: lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java?rev=1678167&r1=1678166&r2=1678167&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/queries/src/test/org/apache/lucene/queries/TermsQueryTest.java Thu May 7 11:16:06 2015
@@ -37,6 +37,7 @@ import org.apache.lucene.search.BooleanQ
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
@@ -185,30 +186,6 @@ public class TermsQueryTest extends Luce
assertFalse(left.equals(right));
}
- public void testNoTerms() {
- List<Term> emptyTerms = Collections.emptyList();
- List<BytesRef> emptyBytesRef = Collections.emptyList();
- try {
- new TermsQuery(emptyTerms);
- fail("must fail - no terms!");
- } catch (IllegalArgumentException e) {}
-
- try {
- new TermsQuery(emptyTerms.toArray(new Term[0]));
- fail("must fail - no terms!");
- } catch (IllegalArgumentException e) {}
-
- try {
- new TermsQuery(null, emptyBytesRef.toArray(new BytesRef[0]));
- fail("must fail - no terms!");
- } catch (IllegalArgumentException e) {}
-
- try {
- new TermsQuery(null, emptyBytesRef);
- fail("must fail - no terms!");
- } catch (IllegalArgumentException e) {}
- }
-
public void testToString() {
TermsQuery termsQuery = new TermsQuery(new Term("field1", "a"),
new Term("field1", "b"),
@@ -216,6 +193,24 @@ public class TermsQueryTest extends Luce
assertEquals("field1:a field1:b field1:c", termsQuery.toString());
}
+ public void testDedup() {
+ Query query1 = new TermsQuery(new Term("foo", "bar"));
+ Query query2 = new TermsQuery(new Term("foo", "bar"), new Term("foo", "bar"));
+ QueryUtils.checkEqual(query1, query2);
+ }
+
+ public void testOrderDoesNotMatter() {
+ // order of terms if different
+ Query query1 = new TermsQuery(new Term("foo", "bar"), new Term("foo", "baz"));
+ Query query2 = new TermsQuery(new Term("foo", "baz"), new Term("foo", "bar"));
+ QueryUtils.checkEqual(query1, query2);
+
+ // order of fields is different
+ query1 = new TermsQuery(new Term("foo", "bar"), new Term("bar", "bar"));
+ query2 = new TermsQuery(new Term("bar", "bar"), new Term("foo", "bar"));
+ QueryUtils.checkEqual(query1, query2);
+ }
+
public void testRamBytesUsed() {
List<Term> terms = new ArrayList<>();
final int numTerms = 1000 + random().nextInt(1000);
@@ -225,8 +220,7 @@ public class TermsQueryTest extends Luce
TermsQuery query = new TermsQuery(terms);
final long actualRamBytesUsed = RamUsageTester.sizeOf(query);
final long expectedRamBytesUsed = query.ramBytesUsed();
- // error margin within 1%
- assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 100);
+ // error margin within 5%
+ assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 20);
}
-
}