You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2019/10/23 11:52:45 UTC

[lucene-solr] branch branch_8x updated (bb3bcdd -> fb37f89)

This is an automated email from the ASF dual-hosted git repository.

cpoerschke pushed a change to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git.


    from bb3bcdd  LUCENE-9006: WDGF catenateAll should come before parts Fixes #953
     new 516f607  LUCENE-9010: extend TopGroups.merge test coverage
     new fb37f89  LUCENE-8996: maxScore was sometimes missing from distributed grouped responses. (Julien Massenet, Diego Ceccarelli, Christine Poerschke)

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 lucene/CHANGES.txt                                 |   3 +
 .../apache/lucene/search/grouping/TopGroups.java   |  17 +-
 .../lucene/search/grouping/TopGroupsTest.java      | 231 +++++++++++++++++++++
 3 files changed, 249 insertions(+), 2 deletions(-)
 create mode 100644 lucene/grouping/src/test/org/apache/lucene/search/grouping/TopGroupsTest.java


[lucene-solr] 02/02: LUCENE-8996: maxScore was sometimes missing from distributed grouped responses. (Julien Massenet, Diego Ceccarelli, Christine Poerschke)

Posted by cp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

cpoerschke pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit fb37f89af27d881efb0f7b40eff2dc76ef5bc2c4
Author: Christine Poerschke <cp...@apache.org>
AuthorDate: Wed Oct 23 12:08:42 2019 +0100

    LUCENE-8996: maxScore was sometimes missing from distributed grouped responses.
    (Julien Massenet, Diego Ceccarelli, Christine Poerschke)
    
    Resolved Conflicts:
    	lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java
---
 lucene/CHANGES.txt                                      |  3 +++
 .../org/apache/lucene/search/grouping/TopGroups.java    | 17 +++++++++++++++--
 .../apache/lucene/search/grouping/TopGroupsTest.java    |  4 ----
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1d9d4b9..1fec234 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -170,6 +170,9 @@ Bug Fixes
   subclauses, and then pull SHOULD, MUST_NOT and FILTER visitors from it rather than from
   the parent.  (Alan Woodward)
 
+* LUCENE-8996: maxScore was sometimes missing from distributed grouped responses.
+  (Julien Massenet, Diego Ceccarelli, Christine Poerschke)
+
 Other
 
 * LUCENE-8778 LUCENE-8911 LUCENE-8957: Define analyzer SPI names as static final fields and document the names in Javadocs.
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java
index 71338f9..af1fffd 100644
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java
@@ -80,6 +80,19 @@ public class TopGroups<T> {
     Avg,
   }
 
+  /**
+   * If either value is NaN then return the other value, otherwise
+   * return the greater of the two values by calling Math.max.
+   * @param a - one value
+   * @param b - another value
+   * @return ignoring any NaN return the greater of a and b
+   */
+  private static float nonNANmax(float a, float b) {
+    if (Float.isNaN(a)) return b;
+    if (Float.isNaN(b)) return a;
+    return Math.max(a, b);
+  }
+
   /** Merges an array of TopGroups, for example obtained
    *  from the second-pass collector across multiple
    *  shards.  Each TopGroups must have been sorted by the
@@ -169,7 +182,7 @@ public class TopGroups<T> {
               shardGroupDocs.scoreDocs,
               docSort.getSort());
         }
-        maxScore = Math.max(maxScore, shardGroupDocs.maxScore);
+        maxScore =  nonNANmax(maxScore, shardGroupDocs.maxScore);
         assert shardGroupDocs.totalHits.relation == Relation.EQUAL_TO;
         totalHits += shardGroupDocs.totalHits.value;
         scoreSum += shardGroupDocs.score;
@@ -223,7 +236,7 @@ public class TopGroups<T> {
                                                    mergedScoreDocs,
                                                    groupValue,
                                                    shardGroups[0].groups[groupIDX].groupSortValues);
-      totalMaxScore = Math.max(totalMaxScore, maxScore);
+      totalMaxScore = nonNANmax(totalMaxScore, maxScore);
     }
 
     if (totalGroupCount != null) {
diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TopGroupsTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TopGroupsTest.java
index 8fb661d..e160bf4 100644
--- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TopGroupsTest.java
+++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TopGroupsTest.java
@@ -21,16 +21,12 @@ import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TotalHits;
 import org.apache.lucene.util.LuceneTestCase;
 
-import org.junit.Ignore;
-
 public class TopGroupsTest extends LuceneTestCase {
 
-  @Ignore // https://issues.apache.org/jira/browse/LUCENE-8996
   public void testAllGroupsEmptyInSecondPass() {
     narrativeMergeTestImplementation(false, false, false, false);
   }
 
-  @Ignore // https://issues.apache.org/jira/browse/LUCENE-8996
   public void testSomeGroupsEmptyInSecondPass() {
     narrativeMergeTestImplementation(false, false, false, true);
     narrativeMergeTestImplementation(false, false, true, false);


[lucene-solr] 01/02: LUCENE-9010: extend TopGroups.merge test coverage

Posted by cp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

cpoerschke pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 516f607618a501596272bbfdcb533277072e485e
Author: Christine Poerschke <cp...@apache.org>
AuthorDate: Tue Oct 22 15:50:48 2019 +0100

    LUCENE-9010: extend TopGroups.merge test coverage
---
 .../lucene/search/grouping/TopGroupsTest.java      | 235 +++++++++++++++++++++
 1 file changed, 235 insertions(+)

diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TopGroupsTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TopGroupsTest.java
new file mode 100644
index 0000000..8fb661d
--- /dev/null
+++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TopGroupsTest.java
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.grouping;
+
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TotalHits;
+import org.apache.lucene.util.LuceneTestCase;
+
+import org.junit.Ignore;
+
+public class TopGroupsTest extends LuceneTestCase {
+
+  @Ignore // https://issues.apache.org/jira/browse/LUCENE-8996
+  public void testAllGroupsEmptyInSecondPass() {
+    narrativeMergeTestImplementation(false, false, false, false);
+  }
+
+  @Ignore // https://issues.apache.org/jira/browse/LUCENE-8996
+  public void testSomeGroupsEmptyInSecondPass() {
+    narrativeMergeTestImplementation(false, false, false, true);
+    narrativeMergeTestImplementation(false, false, true, false);
+    narrativeMergeTestImplementation(false, false, true, true);
+
+    narrativeMergeTestImplementation(false, true, false, false);
+    narrativeMergeTestImplementation(false, true, false, true);
+    narrativeMergeTestImplementation(false, true, true, false);
+    narrativeMergeTestImplementation(false, true, true, true);
+
+    narrativeMergeTestImplementation(true, false, false, false);
+    narrativeMergeTestImplementation(true, false, false, true);
+    narrativeMergeTestImplementation(true, false, true, false);
+    narrativeMergeTestImplementation(true, false, true, true);
+
+    narrativeMergeTestImplementation(true, true, false, false);
+    narrativeMergeTestImplementation(true, true, false, true);
+    narrativeMergeTestImplementation(true, true, true, false);
+  }
+
+  public void testNoGroupsEmptyInSecondPass() {
+    narrativeMergeTestImplementation(true, true, true, true);
+  }
+
+  /*
+   * This method implements tests for the <code>TopGroup.merge</code> method
+   * using a narrative approach. Use of a creative narrative may seem unusual
+   * or even silly but the idea behind it is to make it hopefully easier to
+   * reason about the documents and groups and scores in the test whilst testing
+   * several scenario permutations.
+   *
+   * Imagine:
+   *
+   * Each document represents (say) a picture book of an animal.
+   * We are searching for two books and wish to draw a picture of our own, inspired by the books.
+   * We think that large animals are easier to draw and therefore order the books by the featured animal's size.
+   * We think that different colors would make for a good drawing and therefore group the books by the featured animal's color.
+   *
+   * Index content:
+   *
+   * The documents are in 2 groups ("blue" and "red") and there are 4 documents across 2 shards:
+   * shard 1 (blue whale, red ant) and shard 2 (blue dragonfly, red squirrel).
+   *
+   * If all documents are present the "blue whale" and the "red squirrel" documents would be returned
+   * for our drawing since they are the largest animals in their respective groups.
+   *
+   * Test permutations (haveBlueWhale, haveRedAnt, haveBlueDragonfly, haveRedSquirrel) arise because
+   * in the first pass of the search all documents can be present, but
+   * in the second pass of the search some documents could be missing
+   * if they have been deleted 'just so' between the two phases.
+   *
+   * Additionally a <code>haveAnimal == false</code> condition also represents scenarios where a given
+   * group has documents on some but not all shards in the collection.
+   */
+  private void narrativeMergeTestImplementation(
+      boolean haveBlueWhale,
+      boolean haveRedAnt,
+      boolean haveBlueDragonfly,
+      boolean haveRedSquirrel) {
+
+    final String blueGroupValue = "blue";
+    final String redGroupValue = "red";
+
+    final Integer redAntSize = 1;
+    final Integer blueDragonflySize = 10;
+    final Integer redSquirrelSize = 100;
+    final Integer blueWhaleSize = 1000;
+
+    final float redAntScore = redAntSize;
+    final float blueDragonflyScore = blueDragonflySize;
+    final float redSquirrelScore = redSquirrelSize;
+    final float blueWhaleScore = blueWhaleSize;
+
+    final Sort sort = Sort.RELEVANCE;
+
+    final TopGroups<String> shard1TopGroups;
+    {
+      final GroupDocs<String> group1 = haveBlueWhale
+          ? createSingletonGroupDocs(blueGroupValue, new Object[] { blueWhaleSize }, 1 /* docId */, blueWhaleScore, 0 /* shardIndex */)
+              : createEmptyGroupDocs(blueGroupValue, new Object[] { blueWhaleSize });
+
+      final GroupDocs<String> group2 = haveRedAnt
+          ? createSingletonGroupDocs(redGroupValue, new Object[] { redAntSize }, 2 /* docId */, redAntScore, 0 /* shardIndex */)
+              : createEmptyGroupDocs(redGroupValue, new Object[] { redAntSize });
+
+      shard1TopGroups = new TopGroups<String>(
+          sort.getSort() /* groupSort */,
+          sort.getSort() /* withinGroupSort */,
+          group1.scoreDocs.length + group2.scoreDocs.length /* totalHitCount */,
+          group1.scoreDocs.length + group2.scoreDocs.length /* totalGroupedHitCount */,
+          combineGroupDocs(group1, group2) /* groups */,
+          (haveBlueWhale ? blueWhaleScore : (haveRedAnt ? redAntScore : Float.NaN)) /* maxScore */);
+    }
+
+    final TopGroups<String> shard2TopGroups;
+    {
+      final GroupDocs<String> group1 = haveBlueDragonfly
+          ? createSingletonGroupDocs(blueGroupValue, new Object[] { blueDragonflySize }, 3 /* docId */, blueDragonflyScore, 1 /* shardIndex */)
+              : createEmptyGroupDocs(blueGroupValue, new Object[] { blueDragonflySize });
+
+      final GroupDocs<String> group2 = haveRedSquirrel
+      ? createSingletonGroupDocs(redGroupValue, new Object[] { redSquirrelSize }, 4 /* docId */, redSquirrelScore, 1 /* shardIndex */)
+          : createEmptyGroupDocs(redGroupValue, new Object[] { redSquirrelSize });
+
+      shard2TopGroups = new TopGroups<String>(
+          sort.getSort() /* groupSort */,
+          sort.getSort() /* withinGroupSort */,
+          group1.scoreDocs.length + group2.scoreDocs.length /* totalHitCount */,
+          group1.scoreDocs.length + group2.scoreDocs.length /* totalGroupedHitCount */,
+          combineGroupDocs(group1, group2) /* groups */,
+          (haveRedSquirrel ? redSquirrelScore : (haveBlueDragonfly ? blueDragonflyScore : Float.NaN)) /* maxScore */);
+    }
+
+    final TopGroups<String> mergedTopGroups = TopGroups.<String>merge(
+        combineTopGroups(shard1TopGroups, shard2TopGroups),
+        sort /* groupSort */,
+        sort /* docSort */,
+        0 /* docOffset */,
+        2 /* docTopN */,
+        TopGroups.ScoreMergeMode.None);
+    assertNotNull(mergedTopGroups);
+
+    final int expectedCount =
+        (haveBlueWhale     ? 1 : 0) +
+        (haveRedAnt        ? 1 : 0) +
+        (haveBlueDragonfly ? 1 : 0) +
+        (haveRedSquirrel   ? 1 : 0);
+
+    assertEquals(expectedCount, mergedTopGroups.totalHitCount);
+    assertEquals(expectedCount, mergedTopGroups.totalGroupedHitCount);
+
+    assertEquals(2, mergedTopGroups.groups.length);
+    {
+      assertEquals(blueGroupValue, mergedTopGroups.groups[0].groupValue);
+      final float expectedBlueMaxScore =
+          (haveBlueWhale ? blueWhaleScore : (haveBlueDragonfly ? blueDragonflyScore : Float.MIN_VALUE));
+      checkMaxScore(expectedBlueMaxScore, mergedTopGroups.groups[0].maxScore);
+    }
+    {
+      assertEquals(redGroupValue, mergedTopGroups.groups[1].groupValue);
+      final float expectedRedMaxScore =
+          (haveRedSquirrel ? redSquirrelScore : (haveRedAnt ? redAntScore : Float.MIN_VALUE));
+      checkMaxScore(expectedRedMaxScore, mergedTopGroups.groups[1].maxScore);
+    }
+
+    final float expectedMaxScore =
+        (haveBlueWhale ? blueWhaleScore
+            : (haveRedSquirrel ? redSquirrelScore
+                : (haveBlueDragonfly ? blueDragonflyScore
+                    : (haveRedAnt ? redAntScore
+                        : Float.MIN_VALUE))));
+    checkMaxScore(expectedMaxScore, mergedTopGroups.maxScore);
+  }
+
+  private static void checkMaxScore(float expected, float actual) {
+    if (Float.isNaN(expected)) {
+      assertTrue(Float.isNaN(actual));
+    } else {
+      assertEquals(expected, actual, 0.0);
+    }
+  }
+
+  // helper methods
+
+  private static GroupDocs<String> createEmptyGroupDocs(String groupValue, Object[] groupSortValues) {
+    return new  GroupDocs<String>(
+        Float.NaN /* score */,
+        Float.NaN /* maxScore */,
+        new TotalHits(0, TotalHits.Relation.EQUAL_TO),
+        new ScoreDoc[0],
+        groupValue,
+        groupSortValues);
+    }
+
+  private static GroupDocs<String> createSingletonGroupDocs(String groupValue, Object[] groupSortValues,
+      int docId, float docScore, int shardIndex) {
+    return new  GroupDocs<String>(
+        Float.NaN /* score */,
+        docScore /* maxScore */,
+        new TotalHits(1, TotalHits.Relation.EQUAL_TO),
+        new ScoreDoc[] { new ScoreDoc(docId, docScore, shardIndex) },
+        groupValue,
+        groupSortValues);
+    }
+
+  private static GroupDocs<String>[] combineGroupDocs(GroupDocs<String> group0, GroupDocs<String> group1) {
+    @SuppressWarnings({"unchecked","rawtypes"})
+    final GroupDocs<String>[] groups = new GroupDocs[2];
+    groups[0] = group0;
+    groups[1] = group1;
+    return groups;
+  }
+
+  private static TopGroups<String>[] combineTopGroups(TopGroups<String> group0, TopGroups<String> group1) {
+    @SuppressWarnings({"unchecked","rawtypes"})
+    final TopGroups<String>[] groups = new TopGroups[2];
+    groups[0] = group0;
+    groups[1] = group1;
+    return groups;
+  }
+
+}