You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2010/11/21 22:21:28 UTC

svn commit: r1037563 - in /lucene/dev/trunk/solr/src: java/org/apache/solr/search/Grouping.java test/org/apache/solr/TestGroupingSearch.java

Author: yonik
Date: Sun Nov 21 21:21:28 2010
New Revision: 1037563

URL: http://svn.apache.org/viewvc?rev=1037563&view=rev
Log:
SOLR-236: change definition of how groups are sorted to fix algorithmic error when sort!=group.sort

Modified:
    lucene/dev/trunk/solr/src/java/org/apache/solr/search/Grouping.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/TestGroupingSearch.java

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/Grouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/Grouping.java?rev=1037563&r1=1037562&r2=1037563&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/Grouping.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/Grouping.java Sun Nov 21 21:21:28 2010
@@ -149,11 +149,15 @@ public class Grouping {
       // if we aren't going to return any groups, disregard the offset 
       if (numGroups == 0) maxGroupToFind = 0;
 
+      collector = new TopGroupCollector(groupBy, context, normalizeSort(sort), maxGroupToFind);
+
+      /*** if we need a different algorithm when sort != group.sort
       if (compareSorts(sort, groupSort)) {
         collector = new TopGroupCollector(groupBy, context, normalizeSort(sort), maxGroupToFind);
       } else {
         collector = new TopGroupSortCollector(groupBy, context, normalizeSort(sort), normalizeSort(groupSort), maxGroupToFind);
       }
+      ***/
       return collector;
     }
 
@@ -337,10 +341,6 @@ class SearchGroup {
   // float topDocScore;  // currently unused
   int comparatorSlot;
 
-  // currently only used when sort != sort.group
-  FieldComparator[] sortGroupComparators;
-  int[] sortGroupReversed;
-
   /***
   @Override
   public int hashCode() {
@@ -627,174 +627,6 @@ class TopGroupCollector extends GroupCol
 }
 
 
-/**
- * This class allows a different sort within a group than what is used between groups.
- * Sorting between groups is done by the sort value of the first (highest ranking)
- * document in that group.
- */
-class TopGroupSortCollector extends TopGroupCollector {
-
-  IndexReader reader;
-  Sort groupSort;
-
-  public TopGroupSortCollector(ValueSource groupByVS, Map vsContext, Sort sort, Sort groupSort, int nGroups) throws IOException {
-    super(groupByVS, vsContext, sort, nGroups);
-    this.groupSort = groupSort;
-  }
-
-  void constructComparators(FieldComparator[] comparators, int[] reversed, SortField[] sortFields, int size) throws IOException {
-    for (int i = 0; i < sortFields.length; i++) {
-      SortField sortField = sortFields[i];
-      reversed[i] = sortField.getReverse() ? -1 : 1;
-      comparators[i] = sortField.getComparator(size, i);
-      if (scorer != null) comparators[i].setScorer(scorer);
-      if (reader != null) comparators[i] = comparators[i].setNextReader(reader, docBase);
-    }
-  }
-
-  @Override
-  public void setScorer(Scorer scorer) throws IOException {
-    super.setScorer(scorer);
-    for (SearchGroup searchGroup : groupMap.values()) {
-      for (FieldComparator fc : searchGroup.sortGroupComparators) {
-        fc.setScorer(scorer);
-      }
-    }
-  }
-
-  @Override
-  public void collect(int doc) throws IOException {
-    matches++;
-    filler.fillValue(doc);
-    SearchGroup group = groupMap.get(mval);
-    if (group == null) {
-      int num = groupMap.size();
-      if (groupMap.size() < nGroups) {
-        SearchGroup sg = new SearchGroup();
-        SortField[] sortGroupFields = groupSort.getSort();
-        sg.sortGroupComparators = new FieldComparator[sortGroupFields.length];
-        sg.sortGroupReversed = new int[sortGroupFields.length];
-        constructComparators(sg.sortGroupComparators, sg.sortGroupReversed, sortGroupFields, 1);
-
-        sg.groupValue = mval.duplicate();
-        sg.comparatorSlot = num++;
-        sg.matches = 1;
-        sg.topDoc = docBase + doc;
-        // sg.topDocScore = scorer.score();
-        for (FieldComparator fc : comparators)
-          fc.copy(sg.comparatorSlot, doc);
-        for (FieldComparator fc : sg.sortGroupComparators) {
-          fc.copy(0, doc);
-          fc.setBottom(0);
-        }
-        groupMap.put(sg.groupValue, sg);
-        return;
-      }
-
-      if (orderedGroups == null) {
-        buildSet();
-      }
-
-      // see if this new group would be competitive if this doc was the top doc
-      for (int i = 0;; i++) {
-        final int c = reversed[i] * comparators[i].compareBottom(doc);
-        if (c < 0) {
-          // Definitely not competitive. So don't even bother to continue
-          return;
-        } else if (c > 0) {
-          // Definitely competitive.
-          break;
-        } else if (i == comparators.length - 1) {
-          // Here c=0. If we're at the last comparator, this doc is not
-          // competitive, since docs are visited in doc Id order, which means
-          // this doc cannot compete with any other document in the queue.
-          return;
-        }
-      }
-
-      // remove current smallest group
-      SearchGroup smallest = orderedGroups.pollLast();
-      groupMap.remove(smallest.groupValue);
-
-      // reuse the removed SearchGroup
-      smallest.groupValue.copy(mval);
-      smallest.matches = 1;
-      smallest.topDoc = docBase + doc;
-      // smallest.topDocScore = scorer.score();
-      for (FieldComparator fc : comparators)
-        fc.copy(smallest.comparatorSlot, doc);
-      for (FieldComparator fc : smallest.sortGroupComparators) {
-        fc.copy(0, doc);
-        fc.setBottom(0);
-      }
-
-      groupMap.put(smallest.groupValue, smallest);
-      orderedGroups.add(smallest);
-
-      int lastSlot = orderedGroups.last().comparatorSlot;
-      for (FieldComparator fc : comparators)
-        fc.setBottom(lastSlot);
-
-      return;
-    }
-
-    //
-    // update existing group
-    //
-
-    group.matches++; // TODO: these aren't valid if the group is every discarded then re-added.  keep track if there have been discards?
-
-    for (int i = 0;; i++) {
-      FieldComparator fc = group.sortGroupComparators[i];
-
-      final int c = group.sortGroupReversed[i] * fc.compareBottom(doc);
-      if (c < 0) {
-        // Definitely not competitive.
-        return;
-      } else if (c > 0) {
-        // Definitely competitive.
-        // Set remaining comparators
-        for (int j = 0; j < group.sortGroupComparators.length; j++) {
-          group.sortGroupComparators[j].copy(0, doc);
-          group.sortGroupComparators[j].setBottom(0);
-        }
-        for (FieldComparator comparator : comparators) comparator.copy(spareSlot, doc);
-        break;
-      } else if (i == group.sortGroupComparators.length - 1) {
-        // Here c=0. If we're at the last comparator, this doc is not
-        // competitive, since docs are visited in doc Id order, which means
-        // this doc cannot compete with any other document in the queue.
-        return;
-      }
-    }
-
-    // remove before updating the group since lookup is done via comparators
-    // TODO: optimize this
-    if (orderedGroups != null)
-      orderedGroups.remove(group);
-
-    group.topDoc = docBase + doc;
-    // group.topDocScore = scorer.score();
-    int tmp = spareSlot; spareSlot = group.comparatorSlot; group.comparatorSlot=tmp;  // swap slots
-
-    // re-add the changed group
-    if (orderedGroups != null)
-      orderedGroups.add(group);
-  }
-
-  @Override
-  public void setNextReader(IndexReader reader, int docBase) throws IOException {
-    super.setNextReader(reader, docBase);
-    this.reader = reader;
-    for (SearchGroup searchGroup : groupMap.values()) {
-      for (int i=0; i<searchGroup.sortGroupComparators.length; i++)
-        searchGroup.sortGroupComparators[i] = searchGroup.sortGroupComparators[i].setNextReader(reader, docBase);
-    }
-  }
-
-}
-
-
 class Phase2GroupCollector extends Collector {
   final HashMap<MutableValue, SearchGroupDocs> groupMap;
   final ValueSource vs;

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/TestGroupingSearch.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/TestGroupingSearch.java?rev=1037563&r1=1037562&r2=1037563&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/TestGroupingSearch.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/TestGroupingSearch.java Sun Nov 21 21:21:28 2010
@@ -394,10 +394,11 @@ public class TestGroupingSearch extends 
         Comparator<Doc> groupComparator = random.nextBoolean() ? sortComparator : createSort(h.getCore().getSchema(), types, stringSortA);
         String groupSortStr = stringSortA[0];
 
-// TODO: fix/support different groupComparator
-groupComparator = sortComparator;
-groupSortStr = null;
-// rows=1; start=0; group_offset=1; group_limit=1;
+        // since groupSortStr defaults to sortStr, we need to normalize null to "score desc" if
+        // sortStr != null.
+        if (groupSortStr == null && groupSortStr != sortStr) {
+          groupSortStr = "score desc";
+        }
         
          // Test specific case
         if (false) {
@@ -416,10 +417,15 @@ groupSortStr = null;
           Collections.sort(grp.docs, groupComparator);
         }
 
-        // now sort the groups by the first doc in that group
+        // now sort the groups
+
+        // if sort != group.sort, we need to find the max doc by "sort"
+        if (groupComparator != sortComparator) {
+          for (Grp grp : groups.values()) grp.setMaxDoc(sortComparator); 
+        }
 
         List<Grp> sortedGroups = new ArrayList(groups.values());
-        Collections.sort(sortedGroups, createFirstDocComparator(sortComparator));
+        Collections.sort(sortedGroups,  groupComparator==sortComparator ? createFirstDocComparator(sortComparator) : createMaxDocComparator(sortComparator));
 
         Object modelResponse = buildGroupedResult(h.getCore().getSchema(), sortedGroups, start, rows, group_offset, group_limit);
 
@@ -489,6 +495,18 @@ groupSortStr = null;
   }
 
 
+  public static Comparator<Grp> createMaxDocComparator(final Comparator<Doc> docComparator) {
+    return new Comparator<Grp>() {
+      @Override
+      public int compare(Grp o1, Grp o2) {
+        // all groups should have at least one doc
+        Doc d1 = o1.maxDoc;
+        Doc d2 = o2.maxDoc;
+        return docComparator.compare(d1, d2);
+      }
+    };
+  }
+
   public static Comparator<Grp> createFirstDocComparator(final Comparator<Doc> docComparator) {
     return new Comparator<Grp>() {
       @Override
@@ -501,8 +519,6 @@ groupSortStr = null;
     };
   }
 
-
-
   public static Map<Comparable, Grp> groupBy(Collection<Doc> docs, String field) {
     Map<Comparable, Grp> groups = new HashMap<Comparable, Grp>();
     for (Doc doc : docs) {
@@ -536,7 +552,15 @@ groupSortStr = null;
 
   public static class Grp {
     public Comparable groupValue;
-    public List<SolrTestCaseJ4.Doc> docs;
+    public List<Doc> docs;
+    public Doc maxDoc;  // the document highest according to the "sort" param
+
+
+    public void setMaxDoc(Comparator<Doc> comparator) {
+      Doc[] arr = docs.toArray(new Doc[docs.size()]);
+      Arrays.sort(arr, comparator);
+      maxDoc = arr.length > 0 ? arr[0] : null;
+    }
 
     @Override
     public String toString() {