You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2010/08/26 01:54:19 UTC

svn commit: r989406 - in /lucene/dev/trunk/solr: CHANGES.txt src/java/org/apache/solr/request/UnInvertedField.java src/java/org/apache/solr/util/PrimUtils.java src/test/org/apache/solr/util/IntUtilsTest.java

Author: yonik
Date: Wed Aug 25 23:54:19 2010
New Revision: 989406

URL: http://svn.apache.org/viewvc?rev=989406&view=rev
Log:
SOLR-2089: Faceting: order term ords before converting to values

Added:
    lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java   (with props)
    lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java   (with props)
Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=989406&r1=989405&r2=989406&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Aug 25 23:54:19 2010
@@ -270,6 +270,12 @@ Optimizations
   for the first facet request is anywhere from 30% to 32x, depending on how many
   terms are in the field and how many documents match per term.  (yonik) 
 
+* SOLR-2089: Speed up UnInvertedField faceting (facet.method=fc for
+  multi-valued fields) when facet.limit is both high, and a high enough
+  percentage of the number of unique terms in the field.  Extreme cases
+  yield speedups over 3x. (yonik)
+  
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java?rev=989406&r1=989405&r2=989406&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java Wed Aug 25 23:54:19 2010
@@ -37,6 +37,7 @@ import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.TrieField;
 import org.apache.solr.search.*;
+import org.apache.solr.util.PrimUtils;
 import org.apache.solr.util.BoundedTreeSet;
 import org.apache.solr.handler.component.StatsValues;
 import org.apache.solr.handler.component.FieldFacetStats;
@@ -584,7 +585,7 @@ public class UnInvertedField {
             // important if a lot of the counts are repeated (like zero counts would be).
 
             // minimize object creation and speed comparison by creating a long that
-            // encompases both count and term number.
+            // encompasses both count and term number.
             // Since smaller values are kept in the TreeSet, make higher counts smaller.
             //
             //   for equal counts, lower term numbers
@@ -597,15 +598,41 @@ public class UnInvertedField {
           }
         }
         // now select the right page from the results
+
+
+        final int[] tnums = new int[Math.min(queue.size()-off, lim)];
+        final int[] indirect = counts;  // reuse the counts array for the index into the tnums array
+        int tnumCount = 0;
+
         for (Long p : queue) {
           if (--off>=0) continue;
           if (--lim<0) break;
           int c = -(int)(p.longValue() >>> 32);
           //int tnum = 0x7fffffff - (int)p.longValue();  // use if priority queue
           int tnum = (int)p.longValue();
-          String label = getReadableValue(getTermValue(te, tnum), ft, spare);
-          res.add(label, c);
+          indirect[tnumCount] = tnumCount;
+          tnums[tnumCount++] = tnum;
+          // String label = ft.indexedToReadable(getTermText(te, tnum));
+          // add a null label for now... we'll fill it in later.
+          res.add(null, c);
         }
+
+        // now sort the indexes by the term numbers
+        PrimUtils.sort(0, tnumCount, indirect, new PrimUtils.IntComparator() {
+          @Override
+          public int compare(int a, int b) {
+            return tnums[a] - tnums[b];
+          }
+        });
+
+        // convert the term numbers to term values and set as the label
+        for (int i=0; i<tnumCount; i++) {
+          int idx = indirect[i];
+          int tnum = tnums[idx];
+          String label = getReadableValue(getTermValue(te, tnum), ft, spare);          
+          res.setName(idx, label);
+        }
+
       } else {
         // add results in index order
         int i=startTerm;

Added: lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java?rev=989406&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java (added)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java Wed Aug 25 23:54:19 2010
@@ -0,0 +1,122 @@
+package org.apache.solr.util;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Utilities for primitive Java data types. */
+public class PrimUtils {
+
+  public static abstract class IntComparator {
+    public abstract int compare(int a, int b);
+    public boolean lessThan(int a, int b) {
+      return compare(a,b) < 0;
+    }
+    public boolean equals(int a, int b) {
+      return compare(a,b) == 0;
+    }
+  }
+
+  /** Sort the integer array from "start" inclusive to "end" exclusive in ascending order,
+   *  using the provided comparator.
+   */
+  public static void sort(int start, int end, int[] array, IntComparator comparator) {
+    // This code was copied from Apache Harmony's Arrays.sort(double[]) and modified
+    // to use a comparator, in addition to other small efficiency enhancements
+    // like replacing divisions with shifts.
+
+    int temp;
+    int length = end - start;
+    if (length < 7) {
+      for (int i = start + 1; i < end; i++) {
+        for (int j = i; j > start && comparator.lessThan(array[j], array[j - 1]); j--) {
+          temp = array[j];
+          array[j] = array[j - 1];
+          array[j - 1] = temp;
+        }
+      }
+      return;
+    }
+    int middle = (start + end) >>> 1;
+    if (length > 7) {
+      int bottom = start;
+      int top = end - 1;
+      if (length > 40) {
+        length >>= 3;
+        bottom = med3(array, bottom, bottom + length, bottom
+            + (length<<1), comparator);
+        middle = med3(array, middle - length, middle, middle + length, comparator);
+        top = med3(array, top - (length<<1), top - length, top, comparator);
+      }
+      middle = med3(array, bottom, middle, top, comparator);
+    }
+    int partionValue = array[middle];
+    int a, b, c, d;
+    a = b = start;
+    c = d = end - 1;
+    while (true) {
+      while (b <= c && !comparator.lessThan(partionValue, array[b])) {
+        if (comparator.equals(array[b], partionValue)) {
+          temp = array[a];
+          array[a++] = array[b];
+          array[b] = temp;
+        }
+        b++;
+      }
+      while (c >= b && !comparator.lessThan(array[c], partionValue)) {
+        if (comparator.equals(array[c], partionValue)) {
+          temp = array[c];
+          array[c] = array[d];
+          array[d--] = temp;
+        }
+        c--;
+      }
+      if (b > c) {
+        break;
+      }
+      temp = array[b];
+      array[b++] = array[c];
+      array[c--] = temp;
+    }
+    length = a - start < b - a ? a - start : b - a;
+    int l = start;
+    int h = b - length;
+    while (length-- > 0) {
+      temp = array[l];
+      array[l++] = array[h];
+      array[h++] = temp;
+    }
+    length = d - c < end - 1 - d ? d - c : end - 1 - d;
+    l = b;
+    h = end - length;
+    while (length-- > 0) {
+      temp = array[l];
+      array[l++] = array[h];
+      array[h++] = temp;
+    }
+    if ((length = b - a) > 0) {
+      sort(start, start + length, array, comparator);
+    }
+    if ((length = d - c) > 0) {
+      sort(end - length, end, array, comparator);
+    }
+  }
+
+  private static int med3(int[] array, int a, int b, int c, IntComparator comparator) {
+    int x = array[a], y = array[b], z = array[c];
+    return comparator.lessThan(x, y) ? (comparator.lessThan(y, z) ? b : (comparator.lessThan(x, z) ? c : a))
+        : (comparator.lessThan(z, y) ? b : (comparator.lessThan(z, x) ? c : a));
+  }
+}

Propchange: lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java?rev=989406&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java (added)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java Wed Aug 25 23:54:19 2010
@@ -0,0 +1,54 @@
+package org.apache.solr.util;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.LuceneTestCase;
+
+import java.util.Arrays;
+import java.util.Random;
+
+public class IntUtilsTest extends LuceneTestCase {
+  Random r = newRandom();
+
+  public void testSort() {
+    int maxSize = 100;
+    int maxVal = 100;
+    int[] a = new int[maxSize];
+    int[] b = new int[maxSize];
+
+    PrimUtils.IntComparator comparator = new PrimUtils.IntComparator() {
+      @Override
+      public int compare(int a, int b) {
+        return b - a;     // sort in reverse
+      }
+    };
+
+    for (int iter=0; iter<100; iter++) {
+      int start = r.nextInt(maxSize+1);
+      int end = start==maxSize ? maxSize : start + r.nextInt(maxSize-start);
+      for (int i=start; i<end; i++) {
+        a[i] = b[i] = r.nextInt(maxVal);
+      }
+      PrimUtils.sort(start, end, a, comparator);
+      Arrays.sort(b, start, end);
+      for (int i=start; i<end; i++) {
+        assertEquals(a[i], b[end-(i-start+1)]);
+      }
+    }
+  }
+
+}
\ No newline at end of file

Propchange: lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL