You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2021/04/29 08:48:02 UTC

[lucene] branch main updated: LUCENE-9940: DisjunctionMaxQuery shouldn't depend on disjunct order for equals checks (#110)

This is an automated email from the ASF dual-hosted git repository.

romseygeek pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new f7a3587  LUCENE-9940: DisjunctionMaxQuery shouldn't depend on disjunct order for equals checks (#110)
f7a3587 is described below

commit f7a3587091f8ce05ef08a56523571239b383b217
Author: Alan Woodward <ro...@apache.org>
AuthorDate: Thu Apr 29 09:47:55 2021 +0100

    LUCENE-9940: DisjunctionMaxQuery shouldn't depend on disjunct order for equals checks (#110)
    
    DisjunctionMaxQuery stores its disjuncts in a Query[], and uses
    Arrays.equals() for comparisons in its equals() implementation.
    This means that the order in which disjuncts are added to the query
    matters for equality checks.
    
    This commit changes DMQ to instead store its disjuncts in a Multiset,
    meaning that ordering no longer matters. The getDisjuncts()
    method now returns a Collection<Query> rather than a List, and
    some tests are changed to use query equality checks rather than
    iterating over disjuncts and expecting a particular order.
---
 lucene/CHANGES.txt                                 |  4 +++-
 .../apache/lucene/search/DisjunctionMaxQuery.java  | 24 +++++++++++-----------
 .../lucene/search/TestDisjunctionMaxQuery.java     | 20 +++++++++++++-----
 .../lucene/queryparser/xml/TestCoreParser.java     | 18 +++++++++-------
 4 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4eed756..b7f1b55 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -109,7 +109,6 @@ API Changes
   only applicable for fields that are indexed with doc values only. (Mayya Sharipova,
   Adrien Grand, Simon Willnauer)
 
-
 Improvements
 
 * LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words,
@@ -246,6 +245,9 @@ Bug fixes
 * LUCENE-9930: The Ukrainian analyzer was reloading its dictionary for every new
   TokenStreamComponents, which could lead to memory leaks. (Alan Woodward)
 
+* LUCENE-9940: The order of disjuncts in DisjunctionMaxQuery does not matter
+  for equality checks (Alan Woodward)
+
 Changes in Backwards Compatibility Policy
 
 * LUCENE-9904: regenerated UAX29URLEmailTokenizer and the corresponding analyzer with up-to-date top
diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
index 97ca0fd..3b2e1e2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxQuery.java
@@ -18,7 +18,6 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Iterator;
@@ -45,7 +44,7 @@ import org.apache.lucene.index.LeafReaderContext;
 public final class DisjunctionMaxQuery extends Query implements Iterable<Query> {
 
   /* The subqueries */
-  private final Query[] disjuncts;
+  private final Multiset<Query> disjuncts = new Multiset<>();
 
   /* Multiple of the non-max disjunct scores added into our final score.  Non-zero values support tie-breaking. */
   private final float tieBreakerMultiplier;
@@ -66,7 +65,7 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
       throw new IllegalArgumentException("tieBreakerMultiplier must be in [0, 1]");
     }
     this.tieBreakerMultiplier = tieBreakerMultiplier;
-    this.disjuncts = disjuncts.toArray(new Query[disjuncts.size()]);
+    this.disjuncts.addAll(disjuncts);
   }
 
   /** @return An {@code Iterator<Query>} over the disjuncts */
@@ -76,8 +75,8 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
   }
 
   /** @return the disjuncts. */
-  public List<Query> getDisjuncts() {
-    return Collections.unmodifiableList(Arrays.asList(disjuncts));
+  public Collection<Query> getDisjuncts() {
+    return Collections.unmodifiableCollection(disjuncts);
   }
 
   /** @return tie breaker value for multiple matches. */
@@ -208,8 +207,8 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
    */
   @Override
   public Query rewrite(IndexReader reader) throws IOException {
-    if (disjuncts.length == 1) {
-      return disjuncts[0];
+    if (disjuncts.size() == 1) {
+      return disjuncts.iterator().next();
     }
 
     if (tieBreakerMultiplier == 1.0f) {
@@ -254,14 +253,15 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
   public String toString(String field) {
     StringBuilder buffer = new StringBuilder();
     buffer.append("(");
-    for (int i = 0; i < disjuncts.length; i++) {
-      Query subquery = disjuncts[i];
+    Iterator<Query> it = disjuncts.iterator();
+    for (int i = 0; it.hasNext(); i++) {
+      Query subquery = it.next();
       if (subquery instanceof BooleanQuery) { // wrap sub-bools in parens
         buffer.append("(");
         buffer.append(subquery.toString(field));
         buffer.append(")");
       } else buffer.append(subquery.toString(field));
-      if (i != disjuncts.length - 1) buffer.append(" | ");
+      if (i != disjuncts.size() - 1) buffer.append(" | ");
     }
     buffer.append(")");
     if (tieBreakerMultiplier != 0.0f) {
@@ -285,7 +285,7 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
 
   private boolean equalsTo(DisjunctionMaxQuery other) {
     return tieBreakerMultiplier == other.tieBreakerMultiplier
-        && Arrays.equals(disjuncts, other.disjuncts);
+        && Objects.equals(disjuncts, other.disjuncts);
   }
 
   /**
@@ -297,7 +297,7 @@ public final class DisjunctionMaxQuery extends Query implements Iterable<Query>
   public int hashCode() {
     int h = classHash();
     h = 31 * h + Float.floatToIntBits(tieBreakerMultiplier);
-    h = 31 * h + Arrays.hashCode(disjuncts);
+    h = 31 * h + Objects.hashCode(disjuncts);
     return h;
   }
 }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
index d73f3c0..b5d0edd 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java
@@ -496,11 +496,21 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
     Query sub2 = tq("hed", "elephant");
     DisjunctionMaxQuery q = new DisjunctionMaxQuery(Arrays.asList(sub1, sub2), 1.0f);
     Query rewritten = s.rewrite(q);
-    assertTrue(rewritten instanceof BooleanQuery);
-    BooleanQuery bq = (BooleanQuery) rewritten;
-    assertEquals(bq.clauses().size(), 2);
-    assertEquals(bq.clauses().get(0), new BooleanClause(sub1, BooleanClause.Occur.SHOULD));
-    assertEquals(bq.clauses().get(1), new BooleanClause(sub2, BooleanClause.Occur.SHOULD));
+    Query expected =
+        new BooleanQuery.Builder()
+            .add(sub1, BooleanClause.Occur.SHOULD)
+            .add(sub2, BooleanClause.Occur.SHOULD)
+            .build();
+    assertEquals(expected, rewritten);
+  }
+
+  public void testDisjunctOrderAndEquals() throws Exception {
+    // the order that disjuncts are provided in should not matter for equals() comparisons
+    Query sub1 = tq("hed", "albino");
+    Query sub2 = tq("hed", "elephant");
+    Query q1 = new DisjunctionMaxQuery(Arrays.asList(sub1, sub2), 1.0f);
+    Query q2 = new DisjunctionMaxQuery(Arrays.asList(sub2, sub1), 1.0f);
+    assertEquals(q1, q2);
   }
 
   public void testRandomTopDocs() throws Exception {
diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java
index 06c05ba..ab3953f 100644
--- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java
+++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java
@@ -18,16 +18,19 @@ package org.apache.lucene.queryparser.xml;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.util.Arrays;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenFilter;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DisjunctionMaxQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.util.LuceneTestCase;
@@ -99,13 +102,14 @@ public class TestCoreParser extends LuceneTestCase {
 
   public void testDisjunctionMaxQueryXML() throws ParserException, IOException {
     Query q = parse("DisjunctionMaxQuery.xml");
-    assertTrue(q instanceof DisjunctionMaxQuery);
-    DisjunctionMaxQuery d = (DisjunctionMaxQuery) q;
-    assertEquals(0.0f, d.getTieBreakerMultiplier(), 0.0001f);
-    assertEquals(2, d.getDisjuncts().size());
-    DisjunctionMaxQuery ndq = (DisjunctionMaxQuery) d.getDisjuncts().get(1);
-    assertEquals(0.3f, ndq.getTieBreakerMultiplier(), 0.0001f);
-    assertEquals(1, ndq.getDisjuncts().size());
+    Query expected =
+        new DisjunctionMaxQuery(
+            Arrays.asList(
+                new TermQuery(new Term("a", "merger")),
+                new DisjunctionMaxQuery(
+                    Arrays.asList(new TermQuery(new Term("b", "verger"))), 0.3f)),
+            0.0f);
+    assertEquals(expected, q);
   }
 
   public void testRangeQueryXML() throws ParserException, IOException {