You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2010/08/26 01:54:19 UTC
svn commit: r989406 - in /lucene/dev/trunk/solr: CHANGES.txt
src/java/org/apache/solr/request/UnInvertedField.java
src/java/org/apache/solr/util/PrimUtils.java
src/test/org/apache/solr/util/IntUtilsTest.java
Author: yonik
Date: Wed Aug 25 23:54:19 2010
New Revision: 989406
URL: http://svn.apache.org/viewvc?rev=989406&view=rev
Log:
SOLR-2089: Faceting: order term ords before converting to values
Added:
lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java (with props)
lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java (with props)
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=989406&r1=989405&r2=989406&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Aug 25 23:54:19 2010
@@ -270,6 +270,12 @@ Optimizations
for the first facet request is anywhere from 30% to 32x, depending on how many
terms are in the field and how many documents match per term. (yonik)
+* SOLR-2089: Speed up UnInvertedField faceting (facet.method=fc for
+ multi-valued fields) when facet.limit is both high, and a high enough
+ percentage of the number of unique terms in the field. Extreme cases
+ yield speedups over 3x. (yonik)
+
+
Bug Fixes
----------------------
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java?rev=989406&r1=989405&r2=989406&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java Wed Aug 25 23:54:19 2010
@@ -37,6 +37,7 @@ import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.TrieField;
import org.apache.solr.search.*;
+import org.apache.solr.util.PrimUtils;
import org.apache.solr.util.BoundedTreeSet;
import org.apache.solr.handler.component.StatsValues;
import org.apache.solr.handler.component.FieldFacetStats;
@@ -584,7 +585,7 @@ public class UnInvertedField {
// important if a lot of the counts are repeated (like zero counts would be).
// minimize object creation and speed comparison by creating a long that
- // encompases both count and term number.
+ // encompasses both count and term number.
// Since smaller values are kept in the TreeSet, make higher counts smaller.
//
// for equal counts, lower term numbers
@@ -597,15 +598,41 @@ public class UnInvertedField {
}
}
// now select the right page from the results
+
+
+ final int[] tnums = new int[Math.min(queue.size()-off, lim)];
+ final int[] indirect = counts; // reuse the counts array for the index into the tnums array
+ int tnumCount = 0;
+
for (Long p : queue) {
if (--off>=0) continue;
if (--lim<0) break;
int c = -(int)(p.longValue() >>> 32);
//int tnum = 0x7fffffff - (int)p.longValue(); // use if priority queue
int tnum = (int)p.longValue();
- String label = getReadableValue(getTermValue(te, tnum), ft, spare);
- res.add(label, c);
+ indirect[tnumCount] = tnumCount;
+ tnums[tnumCount++] = tnum;
+ // String label = ft.indexedToReadable(getTermText(te, tnum));
+ // add a null label for now... we'll fill it in later.
+ res.add(null, c);
}
+
+ // now sort the indexes by the term numbers
+ PrimUtils.sort(0, tnumCount, indirect, new PrimUtils.IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ return tnums[a] - tnums[b];
+ }
+ });
+
+ // convert the term numbers to term values and set as the label
+ for (int i=0; i<tnumCount; i++) {
+ int idx = indirect[i];
+ int tnum = tnums[idx];
+ String label = getReadableValue(getTermValue(te, tnum), ft, spare);
+ res.setName(idx, label);
+ }
+
} else {
// add results in index order
int i=startTerm;
Added: lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java?rev=989406&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java (added)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java Wed Aug 25 23:54:19 2010
@@ -0,0 +1,122 @@
+package org.apache.solr.util;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Utilities for primitive Java data types. */
+public class PrimUtils {
+
+ public static abstract class IntComparator {
+ public abstract int compare(int a, int b);
+ public boolean lessThan(int a, int b) {
+ return compare(a,b) < 0;
+ }
+ public boolean equals(int a, int b) {
+ return compare(a,b) == 0;
+ }
+ }
+
+ /** Sort the integer array from "start" inclusive to "end" exclusive in ascending order,
+ * using the provided comparator.
+ */
+ public static void sort(int start, int end, int[] array, IntComparator comparator) {
+ // This code was copied from Apache Harmony's Arrays.sort(double[]) and modified
+ // to use a comparator, in addition to other small efficiency enhancements
+ // like replacing divisions with shifts.
+
+ int temp;
+ int length = end - start;
+ if (length < 7) {
+ for (int i = start + 1; i < end; i++) {
+ for (int j = i; j > start && comparator.lessThan(array[j], array[j - 1]); j--) {
+ temp = array[j];
+ array[j] = array[j - 1];
+ array[j - 1] = temp;
+ }
+ }
+ return;
+ }
+ int middle = (start + end) >>> 1;
+ if (length > 7) {
+ int bottom = start;
+ int top = end - 1;
+ if (length > 40) {
+ length >>= 3;
+ bottom = med3(array, bottom, bottom + length, bottom
+ + (length<<1), comparator);
+ middle = med3(array, middle - length, middle, middle + length, comparator);
+ top = med3(array, top - (length<<1), top - length, top, comparator);
+ }
+ middle = med3(array, bottom, middle, top, comparator);
+ }
+ int partionValue = array[middle];
+ int a, b, c, d;
+ a = b = start;
+ c = d = end - 1;
+ while (true) {
+ while (b <= c && !comparator.lessThan(partionValue, array[b])) {
+ if (comparator.equals(array[b], partionValue)) {
+ temp = array[a];
+ array[a++] = array[b];
+ array[b] = temp;
+ }
+ b++;
+ }
+ while (c >= b && !comparator.lessThan(array[c], partionValue)) {
+ if (comparator.equals(array[c], partionValue)) {
+ temp = array[c];
+ array[c] = array[d];
+ array[d--] = temp;
+ }
+ c--;
+ }
+ if (b > c) {
+ break;
+ }
+ temp = array[b];
+ array[b++] = array[c];
+ array[c--] = temp;
+ }
+ length = a - start < b - a ? a - start : b - a;
+ int l = start;
+ int h = b - length;
+ while (length-- > 0) {
+ temp = array[l];
+ array[l++] = array[h];
+ array[h++] = temp;
+ }
+ length = d - c < end - 1 - d ? d - c : end - 1 - d;
+ l = b;
+ h = end - length;
+ while (length-- > 0) {
+ temp = array[l];
+ array[l++] = array[h];
+ array[h++] = temp;
+ }
+ if ((length = b - a) > 0) {
+ sort(start, start + length, array, comparator);
+ }
+ if ((length = d - c) > 0) {
+ sort(end - length, end, array, comparator);
+ }
+ }
+
+ private static int med3(int[] array, int a, int b, int c, IntComparator comparator) {
+ int x = array[a], y = array[b], z = array[c];
+ return comparator.lessThan(x, y) ? (comparator.lessThan(y, z) ? b : (comparator.lessThan(x, z) ? c : a))
+ : (comparator.lessThan(z, y) ? b : (comparator.lessThan(z, x) ? c : a));
+ }
+}
Propchange: lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/dev/trunk/solr/src/java/org/apache/solr/util/PrimUtils.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java?rev=989406&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java (added)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java Wed Aug 25 23:54:19 2010
@@ -0,0 +1,54 @@
+package org.apache.solr.util;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.LuceneTestCase;
+
+import java.util.Arrays;
+import java.util.Random;
+
+public class IntUtilsTest extends LuceneTestCase {
+ Random r = newRandom();
+
+ public void testSort() {
+ int maxSize = 100;
+ int maxVal = 100;
+ int[] a = new int[maxSize];
+ int[] b = new int[maxSize];
+
+ PrimUtils.IntComparator comparator = new PrimUtils.IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ return b - a; // sort in reverse
+ }
+ };
+
+ for (int iter=0; iter<100; iter++) {
+ int start = r.nextInt(maxSize+1);
+ int end = start==maxSize ? maxSize : start + r.nextInt(maxSize-start);
+ for (int i=start; i<end; i++) {
+ a[i] = b[i] = r.nextInt(maxVal);
+ }
+ PrimUtils.sort(start, end, a, comparator);
+ Arrays.sort(b, start, end);
+ for (int i=start; i<end; i++) {
+ assertEquals(a[i], b[end-(i-start+1)]);
+ }
+ }
+ }
+
+}
\ No newline at end of file
Propchange: lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/dev/trunk/solr/src/test/org/apache/solr/util/IntUtilsTest.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL