You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/12/18 21:40:36 UTC

svn commit: r1552086 - in /lucene/dev/branches/lucene5339/lucene: ./ facet/src/java/org/apache/lucene/facet/ facet/src/test/org/apache/lucene/facet/

Author: mikemccand
Date: Wed Dec 18 20:40:35 2013
New Revision: 1552086

URL: http://svn.apache.org/r1552086
Log:
LUCENE-5371: faster range faceting using segment trees

Added:
    lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeCounter.java   (with props)
Modified:
    lucene/dev/branches/lucene5339/lucene/CHANGES.txt
    lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java
    lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java
    lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java
    lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java
    lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/Range.java
    lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java
    lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/TestRangeFacetCounts.java

Modified: lucene/dev/branches/lucene5339/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/CHANGES.txt?rev=1552086&r1=1552085&r2=1552086&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene5339/lucene/CHANGES.txt Wed Dec 18 20:40:35 2013
@@ -68,6 +68,11 @@ New Features
 * LUCENE-5336: Add SimpleQueryParser: parser for human-entered queries.
   (Jack Conradson via Robert Muir)
 
+* LUCENE-5371: Speed up Lucene range faceting from O(N) per hit to
+  O(log(N)) per hit using segment trees; this only really starts to
+  matter in practice if the number of ranges is over 10 or so.  (Mike
+  McCandless)
+
 Build
 
 * LUCENE-5217: Maven config: get dependencies from Ant+Ivy config; disable

Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java?rev=1552086&r1=1552085&r2=1552086&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRange.java Wed Dec 18 20:40:35 2013
@@ -28,6 +28,7 @@ import org.apache.lucene.search.DocIdSet
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.NumericUtils;
 
 /** Represents a range over double values. */
 public final class DoubleRange extends Range {
@@ -40,10 +41,10 @@ public final class DoubleRange extends R
   public final boolean maxInclusive;
 
   /** Create a DoubleRange. */
-  public DoubleRange(String label, double min, boolean minInclusive, double max, boolean maxInclusive) {
+  public DoubleRange(String label, double minIn, boolean minInclusive, double maxIn, boolean maxInclusive) {
     super(label);
-    this.min = min;
-    this.max = max;
+    this.min = minIn;
+    this.max = maxIn;
     this.minInclusive = minInclusive;
     this.maxInclusive = maxInclusive;
 
@@ -56,7 +57,7 @@ public final class DoubleRange extends R
       throw new IllegalArgumentException("min cannot be NaN");
     }
     if (!minInclusive) {
-      min = Math.nextUp(min);
+      minIn = Math.nextUp(minIn);
     }
 
     if (Double.isNaN(max)) {
@@ -64,17 +65,32 @@ public final class DoubleRange extends R
     }
     if (!maxInclusive) {
       // Why no Math.nextDown?
-      max = Math.nextAfter(max, Double.NEGATIVE_INFINITY);
+      maxIn = Math.nextAfter(maxIn, Double.NEGATIVE_INFINITY);
     }
 
-    this.minIncl = min;
-    this.maxIncl = max;
+    if (minIn > maxIn) {
+      failNoMatch();
+    }
+
+    this.minIncl = minIn;
+    this.maxIncl = maxIn;
   }
 
   public boolean accept(double value) {
     return value >= minIncl && value <= maxIncl;
   }
 
+  LongRange toLongRange() {
+    return new LongRange(label,
+                         NumericUtils.doubleToSortableLong(minIncl), true,
+                         NumericUtils.doubleToSortableLong(maxIncl), true);
+  }
+
+  @Override
+  public String toString() {
+    return "DoubleRange(" + minIncl + " to " + maxIncl + ")";
+  }
+
   /** Returns a new {@link Filter} accepting only documents
    *  in this range.  Note that this filter is not
    *  efficient: it's a linear scan of all docs, testing

Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java?rev=1552086&r1=1552085&r2=1552086&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/DoubleRangeFacetCounts.java Wed Dec 18 20:40:35 2013
@@ -28,14 +28,16 @@ import org.apache.lucene.queries.functio
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.queries.function.valuesource.DoubleFieldSource;
 import org.apache.lucene.queries.function.valuesource.FloatFieldSource; // javadocs
+import org.apache.lucene.util.NumericUtils;
 
 /** {@link Facets} implementation that computes counts for
  *  dynamic double ranges from a provided {@link
  *  ValueSource}, using {@link FunctionValues#doubleVal}.  Use
  *  this for dimensions that change in real-time (e.g. a
  *  relative time based dimension like "Past day", "Past 2
- *  days", etc.) or that change for each user (e.g. a
- *  distance dimension like "< 1 km", "< 2 km", etc.).
+ *  days", etc.) or that change for each request (e.g.
+ *  distance from the user's location, "< 1 km", "< 2 km",
+ *  etc.).
  *
  *  <p> If you had indexed your field using {@link
  *  FloatDocValuesField} then pass {@link FloatFieldSource}
@@ -64,6 +66,16 @@ public class DoubleRangeFacetCounts exte
 
     DoubleRange[] ranges = (DoubleRange[]) this.ranges;
 
+    LongRange[] longRanges = new LongRange[ranges.length];
+    for(int i=0;i<ranges.length;i++) {
+      DoubleRange range = ranges[i];
+      longRanges[i] =  new LongRange(range.label,
+                                     NumericUtils.doubleToSortableLong(range.minIncl), true,
+                                     NumericUtils.doubleToSortableLong(range.maxIncl), true);
+    }
+
+    LongRangeCounter counter = new LongRangeCounter(longRanges);
+
     // Compute min & max over all ranges:
     double minIncl = Double.POSITIVE_INFINITY;
     double maxIncl = Double.NEGATIVE_INFINITY;
@@ -72,9 +84,7 @@ public class DoubleRangeFacetCounts exte
       maxIncl = Math.max(maxIncl, range.maxIncl);
     }
 
-    // TODO: test if this is faster (in the past it was
-    // faster to do MatchingDocs on the inside) ... see
-    // patches on LUCENE-4965):
+    int missingCount = 0;
     for (MatchingDocs hits : matchingDocs) {
       FunctionValues fv = valueSource.getValues(Collections.emptyMap(), hits.context);
       final int length = hits.bits.length();
@@ -83,27 +93,15 @@ public class DoubleRangeFacetCounts exte
       while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) {
         // Skip missing docs:
         if (fv.exists(doc)) {
-          
-          double v = fv.doubleVal(doc);
-          if (v < minIncl || v > maxIncl) {
-            doc++;
-            continue;
-          }
-
-          // TODO: if all ranges are non-overlapping, we
-          // should instead do a bin-search up front
-          // (really, a specialized case of the interval
-          // tree)
-          // TODO: use interval tree instead of linear search:
-          for (int j = 0; j < ranges.length; j++) {
-            if (ranges[j].accept(v)) {
-              counts[j]++;
-            }
-          }
+          counter.add(NumericUtils.doubleToSortableLong(fv.doubleVal(doc)));
+        } else {
+          missingCount++;
         }
-
         doc++;
       }
     }
+
+    missingCount += counter.fillCounts(counts);
+    totCount -= missingCount;
   }
 }

Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java?rev=1552086&r1=1552085&r2=1552086&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRange.java Wed Dec 18 20:40:35 2013
@@ -43,29 +43,46 @@ public final class LongRange extends Ran
   // Double/FloatRange too)
 
   /** Create a LongRange. */
-  public LongRange(String label, long min, boolean minInclusive, long max, boolean maxInclusive) {
+  public LongRange(String label, long minIn, boolean minInclusive, long maxIn, boolean maxInclusive) {
     super(label);
-    this.min = min;
-    this.max = max;
+    this.min = minIn;
+    this.max = maxIn;
     this.minInclusive = minInclusive;
     this.maxInclusive = maxInclusive;
 
-    if (!minInclusive && min != Long.MAX_VALUE) {
-      min++;
+    if (!minInclusive) {
+      if (minIn != Long.MAX_VALUE) {
+        minIn++;
+      } else {
+        failNoMatch();
+      }
+    }
+
+    if (!maxInclusive) {
+      if (maxIn != Long.MIN_VALUE) {
+        maxIn--;
+      } else {
+        failNoMatch();
+      }
     }
 
-    if (!maxInclusive && max != Long.MIN_VALUE) {
-      max--;
+    if (minIn > maxIn) {
+      failNoMatch();
     }
 
-    this.minIncl = min;
-    this.maxIncl = max;
+    this.minIncl = minIn;
+    this.maxIncl = maxIn;
   }
 
   public boolean accept(long value) {
     return value >= minIncl && value <= maxIncl;
   }
 
+  @Override
+  public String toString() {
+    return "LongRange(" + minIncl + " to " + maxIncl + ")";
+  }
+
   /** Returns a new {@link Filter} accepting only documents
    *  in this range.  Note that this filter is not
    *  efficient: it's a linear scan of all docs, testing

Added: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeCounter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeCounter.java?rev=1552086&view=auto
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeCounter.java (added)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeCounter.java Wed Dec 18 20:40:35 2013
@@ -0,0 +1,318 @@
+package org.apache.lucene.facet;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/** Counts how many times each range was seen;
+ *  per-hit it's just a binary search ({@link #add})
+ *  against the elementary intervals, and in the end we
+ *  rollup back to the original ranges. */
+
+final class LongRangeCounter {
+
+  final LongRangeNode root;
+  final long[] boundaries;
+  final int[] leafCounts;
+
+  // Used during rollup
+  private int leafUpto;
+  private int missingCount;
+
+  public LongRangeCounter(LongRange[] ranges) {
+    // Maps all range inclusive endpoints to int flags; 1
+    // = start of interval, 2 = end of interval.  We need to
+    // track the start vs end case separately because if a
+    // given point is both, then it must be its own
+    // elementary interval:
+    Map<Long,Integer> endsMap = new HashMap<Long,Integer>();
+
+    endsMap.put(Long.MIN_VALUE, 1);
+    endsMap.put(Long.MAX_VALUE, 2);
+
+    for(LongRange range : ranges) {
+      Integer cur = endsMap.get(range.minIncl);
+      if (cur == null) {
+        endsMap.put(range.minIncl, 1);
+      } else {
+        endsMap.put(range.minIncl, cur.intValue() | 1);
+      }
+      cur = endsMap.get(range.maxIncl);
+      if (cur == null) {
+        endsMap.put(range.maxIncl, 2);
+      } else {
+        endsMap.put(range.maxIncl, cur.intValue() | 2);
+      }
+    }
+
+    List<Long> endsList = new ArrayList<Long>(endsMap.keySet());
+    Collections.sort(endsList);
+
+    // Build elementaryIntervals (a 1D Venn diagram):
+    List<InclusiveRange> elementaryIntervals = new ArrayList<InclusiveRange>();
+    int upto0 = 1;
+    long v = endsList.get(0);
+    long prev;
+    if (endsMap.get(v) == 3) {
+      elementaryIntervals.add(new InclusiveRange(v, v));
+      prev = v+1;
+    } else {
+      prev = v;
+    }
+
+    while (upto0 < endsList.size()) {
+      v = endsList.get(upto0);
+      int flags = endsMap.get(v);
+      //System.out.println("  v=" + v + " flags=" + flags);
+      if (flags == 3) {
+        // This point is both an end and a start; we need to
+        // separate it:
+        if (v > prev) {
+          elementaryIntervals.add(new InclusiveRange(prev, v-1));
+        }
+        elementaryIntervals.add(new InclusiveRange(v, v));
+        prev = v+1;
+      } else if (flags == 1) {
+        // This point is only the start of an interval;
+        // attach it to next interval:
+        if (v > prev) {
+          elementaryIntervals.add(new InclusiveRange(prev, v-1));
+        }
+        prev = v;
+      } else {
+        assert flags == 2;
+        // This point is only the end of an interval; attach
+        // it to last interval:
+        elementaryIntervals.add(new InclusiveRange(prev, v));
+        prev = v+1;
+      }
+      //System.out.println("    ints=" + elementaryIntervals);
+      upto0++;
+    }
+
+    // Build binary tree on top of intervals:
+    root = split(0, elementaryIntervals.size(), elementaryIntervals);
+
+    // Set outputs, so we know which range to output for
+    // each node in the tree:
+    for(int i=0;i<ranges.length;i++) {
+      root.addOutputs(i, ranges[i]);
+    }
+
+    // Set boundaries (ends of each elementary interval):
+    boundaries = new long[elementaryIntervals.size()];
+    for(int i=0;i<boundaries.length;i++) {
+      boundaries[i] = elementaryIntervals.get(i).end;
+    }
+
+    leafCounts = new int[boundaries.length];
+
+    //System.out.println("ranges: " + Arrays.toString(ranges));
+    //System.out.println("intervals: " + elementaryIntervals);
+    //System.out.println("boundaries: " + Arrays.toString(boundaries));
+    //System.out.println("root:\n" + root);
+  }
+
+  public void add(long v) {
+    //System.out.println("add v=" + v);
+
+    // NOTE: this works too, but it's ~6% slower on a simple
+    // test with a high-freq TermQuery w/ range faceting on
+    // wikimediumall:
+    /*
+    int index = Arrays.binarySearch(boundaries, v);
+    if (index < 0) {
+      index = -index-1;
+    }
+    leafCounts[index]++;
+    */
+
+    // Binary search to find matched elementary range; we
+    // are guaranteed to find a match because the last
+    // boundary is Long.MAX_VALUE:
+
+    int lo = 0;
+    int hi = boundaries.length - 1;
+    int count = 0;
+    while (true) {
+      int mid = (lo + hi) >>> 1;
+      //System.out.println("  cycle lo=" + lo + " hi=" + hi + " mid=" + mid + " boundary=" + boundaries[mid] + " to " + boundaries[mid+1]);
+      if (v <= boundaries[mid]) {
+        if (mid == 0) {
+          leafCounts[0]++;
+          return;
+        } else {
+          hi = mid - 1;
+        }
+      } else if (v > boundaries[mid+1]) {
+        lo = mid + 1;
+      } else {
+        leafCounts[mid+1]++;
+        //System.out.println("  incr @ " + (mid+1) + "; now " + leafCounts[mid+1]);
+        return;
+      }
+    }
+  }
+
+  /** Fills counts corresponding to the original input
+   *  ranges, returning the missing count (how many hits
+   *  didn't match any ranges). */
+  public int fillCounts(int[] counts) {
+    //System.out.println("  rollup");
+    missingCount = 0;
+    leafUpto = 0;
+    rollup(root, counts, false);
+    return missingCount;
+  }
+
+  private int rollup(LongRangeNode node, int[] counts, boolean sawOutputs) {
+    int count;
+    sawOutputs |= node.outputs != null;
+    if (node.left != null) {
+      count = rollup(node.left, counts, sawOutputs);
+      count += rollup(node.right, counts, sawOutputs);
+    } else {
+      // Leaf:
+      count = leafCounts[leafUpto];
+      leafUpto++;
+      if (!sawOutputs) {
+        // This is a missing count (no output ranges were
+        // seen "above" us):
+        missingCount += count;
+      }
+    }
+    if (node.outputs != null) {
+      for(int rangeIndex : node.outputs) {
+        counts[rangeIndex] += count;
+      }
+    }
+    //System.out.println("  rollup node=" + node.start + " to " + node.end + ": count=" + count);
+    return count;
+  }
+
+  private static LongRangeNode split(int start, int end, List<InclusiveRange> elementaryIntervals) {
+    if (start == end-1) {
+      // leaf
+      InclusiveRange range = elementaryIntervals.get(start);
+      return new LongRangeNode(range.start, range.end, null, null, start);
+    } else {
+      int mid = (start + end) >>> 1;
+      LongRangeNode left = split(start, mid, elementaryIntervals);
+      LongRangeNode right = split(mid, end, elementaryIntervals);
+      return new LongRangeNode(left.start, right.end, left, right, -1);
+    }
+  }
+
+  private static final class InclusiveRange {
+    public final long start;
+    public final long end;
+
+    public InclusiveRange(long start, long end) {
+      assert end >= start;
+      this.start = start;
+      this.end = end;
+    }
+
+    @Override
+    public String toString() {
+      return start + " to " + end;
+    }
+  }
+
+  /** Holds one node of the segment tree. */
+  public static final class LongRangeNode {
+    final LongRangeNode left;
+    final LongRangeNode right;
+
+    // Our range, inclusive:
+    final long start;
+    final long end;
+
+    // If we are a leaf, the index into elementary ranges that
+    // we point to:
+    final int leafIndex;
+
+    // Which range indices to output when a query goes
+    // through this node:
+    List<Integer> outputs;
+
+    public LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right, int leafIndex) {
+      this.start = start;
+      this.end = end;
+      this.left = left;
+      this.right = right;
+      this.leafIndex = leafIndex;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder();
+      toString(sb, 0);
+      return sb.toString();
+    }
+
+    static void indent(StringBuilder sb, int depth) {
+      for(int i=0;i<depth;i++) {
+        sb.append("  ");
+      }
+    }
+
+    /** Recursively assigns range outputs to each node. */
+    void addOutputs(int index, LongRange range) {
+      if (start >= range.minIncl && end <= range.maxIncl) {
+        // Our range is fully included in the incoming
+        // range; add to our output list:
+        if (outputs == null) {
+          outputs = new ArrayList<Integer>();
+        }
+        outputs.add(index);
+      } else if (left != null) {
+        assert right != null;
+        // Recurse:
+        left.addOutputs(index, range);
+        right.addOutputs(index, range);
+      }
+    }
+
+    void toString(StringBuilder sb, int depth) {
+      indent(sb, depth);
+      if (left == null) {
+        assert right == null;
+        sb.append("leaf: " + start + " to " + end);
+      } else {
+        sb.append("node: " + start + " to " + end);
+      }
+      if (outputs != null) {
+        sb.append(" outputs=");
+        sb.append(outputs);
+      }
+      sb.append('\n');
+
+      if (left != null) {
+        assert right != null;
+        left.toString(sb, depth+1);
+        right.toString(sb, depth+1);
+      }
+    }
+  }
+}

Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java?rev=1552086&r1=1552085&r2=1552086&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/LongRangeFacetCounts.java Wed Dec 18 20:40:35 2013
@@ -31,8 +31,9 @@ import org.apache.lucene.queries.functio
  *  using {@link FunctionValues#longVal}.  Use
  *  this for dimensions that change in real-time (e.g. a
  *  relative time based dimension like "Past day", "Past 2
- *  days", etc.) or that change for each user (e.g. a
- *  distance dimension like "< 1 km", "< 2 km", etc.).
+ *  days", etc.) or that change for each request (e.g. 
+ *  distance from the user's location, "< 1 km", "< 2 km",
+ *  etc.).
  *
  *  @lucene.experimental */
 public class LongRangeFacetCounts extends RangeFacetCounts {
@@ -62,9 +63,9 @@ public class LongRangeFacetCounts extend
       maxIncl = Math.max(maxIncl, range.maxIncl);
     }
 
-    // TODO: test if this is faster (in the past it was
-    // faster to do MatchingDocs on the inside) ... see
-    // patches on LUCENE-4965):
+    LongRangeCounter counter = new LongRangeCounter(ranges);
+
+    int missingCount = 0;
     for (MatchingDocs hits : matchingDocs) {
       FunctionValues fv = valueSource.getValues(Collections.emptyMap(), hits.context);
       final int length = hits.bits.length();
@@ -73,27 +74,20 @@ public class LongRangeFacetCounts extend
       while (doc < length && (doc = hits.bits.nextSetBit(doc)) != -1) {
         // Skip missing docs:
         if (fv.exists(doc)) {
-          
-          long v = fv.longVal(doc);
-          if (v < minIncl || v > maxIncl) {
-            doc++;
-            continue;
-          }
-
-          // TODO: if all ranges are non-overlapping, we
-          // should instead do a bin-search up front
-          // (really, a specialized case of the interval
-          // tree)
-          // TODO: use interval tree instead of linear search:
-          for (int j = 0; j < ranges.length; j++) {
-            if (ranges[j].accept(v)) {
-              counts[j]++;
-            }
-          }
+          counter.add(fv.longVal(doc));
+        } else {
+          missingCount++;
         }
 
         doc++;
       }
     }
+    
+    int x = counter.fillCounts(counts);
+
+    missingCount += x;
+
+    //System.out.println("totCount " + totCount + " missingCount " + counter.missingCount);
+    totCount -= missingCount;
   }
 }

Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/Range.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/Range.java?rev=1552086&r1=1552085&r2=1552086&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/Range.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/Range.java Wed Dec 18 20:40:35 2013
@@ -24,6 +24,13 @@ public abstract class Range {
   public final String label;
 
   protected Range(String label) {
+    if (label == null) {
+      throw new NullPointerException("label cannot be null");
+    }
     this.label = label;
   }
+
+  protected void failNoMatch() {
+    throw new IllegalArgumentException("range \"" + label + "\" matches nothing");
+  }
 }

Modified: lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java?rev=1552086&r1=1552085&r2=1552086&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/java/org/apache/lucene/facet/RangeFacetCounts.java Wed Dec 18 20:40:35 2013
@@ -39,9 +39,6 @@ abstract class RangeFacetCounts extends 
     counts = new int[ranges.length];
   }
 
-  // nocommit all args are ... unused ... this doesn't "fit"
-  // very well:
-
   @Override
   public FacetResult getTopChildren(int topN, String dim, String... path) {
     if (dim.equals(field) == false) {

Modified: lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/TestRangeFacetCounts.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/TestRangeFacetCounts.java?rev=1552086&r1=1552085&r2=1552086&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/TestRangeFacetCounts.java (original)
+++ lucene/dev/branches/lucene5339/lucene/facet/src/test/org/apache/lucene/facet/TestRangeFacetCounts.java Wed Dec 18 20:40:35 2013
@@ -62,6 +62,8 @@ public class TestRangeFacetCounts extend
       field.setLongValue(l);
       w.addDocument(doc);
     }
+
+    // Also add Long.MAX_VALUE
     field.setLongValue(Long.MAX_VALUE);
     w.addDocument(doc);
 
@@ -78,9 +80,107 @@ public class TestRangeFacetCounts extend
         new LongRange("over 90", 90L, false, 100L, false),
         new LongRange("90 or above", 90L, true, 100L, false),
         new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, true));
+
+    FacetResult result = facets.getTopChildren(10, "field");
+    assertEquals("dim=field path=[] value=22 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (1)\n",
+                 result.toString());
     
+    r.close();
+    d.close();
+  }
+
+  public void testUselessRange() {
+    try {
+      new LongRange("useless", 7, true, 6, true);
+      fail("did not hit expected exception");
+    } catch (IllegalArgumentException iae) {
+      // expected
+    }
+    try {
+      new LongRange("useless", 7, true, 7, false);
+      fail("did not hit expected exception");
+    } catch (IllegalArgumentException iae) {
+      // expected
+    }
+    try {
+      new DoubleRange("useless", 7.0, true, 6.0, true);
+      fail("did not hit expected exception");
+    } catch (IllegalArgumentException iae) {
+      // expected
+    }
+    try {
+      new DoubleRange("useless", 7.0, true, 7.0, false);
+      fail("did not hit expected exception");
+    } catch (IllegalArgumentException iae) {
+      // expected
+    }
+  }
+
+  public void testLongMinMax() throws Exception {
+
+    Directory d = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), d);
+    Document doc = new Document();
+    NumericDocValuesField field = new NumericDocValuesField("field", 0L);
+    doc.add(field);
+    field.setLongValue(Long.MIN_VALUE);
+    w.addDocument(doc);
+    field.setLongValue(0);
+    w.addDocument(doc);
+    field.setLongValue(Long.MAX_VALUE);
+    w.addDocument(doc);
+
+    IndexReader r = w.getReader();
+    w.close();
+
+    FacetsCollector fc = new FacetsCollector();
+    IndexSearcher s = newSearcher(r);
+    s.search(new MatchAllDocsQuery(), fc);
+
+    Facets facets = new LongRangeFacetCounts("field", fc,
+        new LongRange("min", Long.MIN_VALUE, true, Long.MIN_VALUE, true),
+        new LongRange("max", Long.MAX_VALUE, true, Long.MAX_VALUE, true),
+        new LongRange("all0", Long.MIN_VALUE, true, Long.MAX_VALUE, true),
+        new LongRange("all1", Long.MIN_VALUE, false, Long.MAX_VALUE, true),
+        new LongRange("all2", Long.MIN_VALUE, true, Long.MAX_VALUE, false),
+        new LongRange("all3", Long.MIN_VALUE, false, Long.MAX_VALUE, false));
+
     FacetResult result = facets.getTopChildren(10, "field");
-    assertEquals("dim=field path=[] value=101 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (1)\n",
+    assertEquals("dim=field path=[] value=3 childCount=6\n  min (1)\n  max (1)\n  all0 (3)\n  all1 (2)\n  all2 (2)\n  all3 (1)\n",
+                 result.toString());
+    
+    r.close();
+    d.close();
+  }
+
+  public void testOverlappedEndStart() throws Exception {
+    Directory d = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), d);
+    Document doc = new Document();
+    NumericDocValuesField field = new NumericDocValuesField("field", 0L);
+    doc.add(field);
+    for(long l=0;l<100;l++) {
+      field.setLongValue(l);
+      w.addDocument(doc);
+    }
+    field.setLongValue(Long.MAX_VALUE);
+    w.addDocument(doc);
+
+    IndexReader r = w.getReader();
+    w.close();
+
+    FacetsCollector fc = new FacetsCollector();
+    IndexSearcher s = newSearcher(r);
+    s.search(new MatchAllDocsQuery(), fc);
+
+    Facets facets = new LongRangeFacetCounts("field", fc,
+        new LongRange("0-10", 0L, true, 10L, true),
+        new LongRange("10-20", 10L, true, 20L, true),
+        new LongRange("20-30", 20L, true, 30L, true),
+        new LongRange("30-40", 30L, true, 40L, true));
+    
+    FacetResult result = facets.getTopChildren(10, "field");
+    assertEquals("dim=field path=[] value=41 childCount=4\n  0-10 (11)\n  10-20 (11)\n  20-30 (11)\n  30-40 (11)\n",
                  result.toString());
     
     r.close();
@@ -162,7 +262,7 @@ public class TestRangeFacetCounts extend
 
     assertEquals(100, dsr.hits.totalHits);
     assertEquals("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.facets.getTopChildren(10, "dim").toString());
-    assertEquals("dim=field path=[] value=100 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n",
+    assertEquals("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n",
                  dsr.facets.getTopChildren(10, "field").toString());
 
     // Second search, drill down on dim=b:
@@ -172,7 +272,7 @@ public class TestRangeFacetCounts extend
 
     assertEquals(75, dsr.hits.totalHits);
     assertEquals("dim=dim path=[] value=100 childCount=2\n  b (75)\n  a (25)\n", dsr.facets.getTopChildren(10, "dim").toString());
-    assertEquals("dim=field path=[] value=75 childCount=5\n  less than 10 (7)\n  less than or equal to 10 (8)\n  over 90 (7)\n  90 or above (8)\n  over 1000 (0)\n",
+    assertEquals("dim=field path=[] value=16 childCount=5\n  less than 10 (7)\n  less than or equal to 10 (8)\n  over 90 (7)\n  90 or above (8)\n  over 1000 (0)\n",
                  dsr.facets.getTopChildren(10, "field").toString());
 
     // Third search, drill down on "less than or equal to 10":
@@ -182,7 +282,7 @@ public class TestRangeFacetCounts extend
 
     assertEquals(11, dsr.hits.totalHits);
     assertEquals("dim=dim path=[] value=11 childCount=2\n  b (8)\n  a (3)\n", dsr.facets.getTopChildren(10, "dim").toString());
-    assertEquals("dim=field path=[] value=100 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n",
+    assertEquals("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n",
                  dsr.facets.getTopChildren(10, "field").toString());
     IOUtils.close(tw, tr, td, w, r, d);
   }
@@ -211,7 +311,7 @@ public class TestRangeFacetCounts extend
         new DoubleRange("90 or above", 90.0, true, 100.0, false),
         new DoubleRange("over 1000", 1000.0, false, Double.POSITIVE_INFINITY, false));
                                          
-    assertEquals("dim=field path=[] value=100 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n",
+    assertEquals("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n",
                  facets.getTopChildren(10, "field").toString());
 
     IOUtils.close(w, r, d);
@@ -242,7 +342,7 @@ public class TestRangeFacetCounts extend
         new DoubleRange("90 or above", 90.0f, true, 100.0f, false),
         new DoubleRange("over 1000", 1000.0f, false, Double.POSITIVE_INFINITY, false));
     
-    assertEquals("dim=field path=[] value=100 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n",
+    assertEquals("dim=field path=[] value=21 childCount=5\n  less than 10 (10)\n  less than or equal to 10 (11)\n  over 90 (9)\n  90 or above (10)\n  over 1000 (0)\n",
                  facets.getTopChildren(10, "field").toString());
     
     IOUtils.close(w, r, d);
@@ -253,6 +353,9 @@ public class TestRangeFacetCounts extend
     RandomIndexWriter w = new RandomIndexWriter(random(), dir);
 
     int numDocs = atLeast(1000);
+    if (VERBOSE) {
+      System.out.println("TEST: numDocs=" + numDocs);
+    }
     long[] values = new long[numDocs];
     for(int i=0;i<numDocs;i++) {
       Document doc = new Document();
@@ -272,20 +375,53 @@ public class TestRangeFacetCounts extend
       if (VERBOSE) {
         System.out.println("TEST: iter=" + iter);
       }
-      int numRange = _TestUtil.nextInt(random(), 1, 5);
+      int numRange = _TestUtil.nextInt(random(), 1, 100);
       LongRange[] ranges = new LongRange[numRange];
       int[] expectedCounts = new int[numRange];
       for(int rangeID=0;rangeID<numRange;rangeID++) {
-        long min = random().nextLong();
-        long max = random().nextLong();
+        long min;
+        if (rangeID > 0 && random().nextInt(10) == 7) {
+          // Use an existing boundary:
+          LongRange prevRange = ranges[random().nextInt(rangeID)];
+          if (random().nextBoolean()) {
+            min = prevRange.min;
+          } else {
+            min = prevRange.max;
+          }
+        } else {
+          min = random().nextLong();
+        }
+        long max;
+        if (rangeID > 0 && random().nextInt(10) == 7) {
+          // Use an existing boundary:
+          LongRange prevRange = ranges[random().nextInt(rangeID)];
+          if (random().nextBoolean()) {
+            max = prevRange.min;
+          } else {
+            max = prevRange.max;
+          }
+        } else {
+          max = random().nextLong();
+        }
+
         if (min > max) {
           long x = min;
           min = max;
           max = x;
         }
-        boolean minIncl = random().nextBoolean();
-        boolean maxIncl = random().nextBoolean();
+        boolean minIncl;
+        boolean maxIncl;
+        if (min == max) {
+          minIncl = true;
+          maxIncl = true;
+        } else {
+          minIncl = random().nextBoolean();
+          maxIncl = random().nextBoolean();
+        }
         ranges[rangeID] = new LongRange("r" + rangeID, min, minIncl, max, maxIncl);
+        if (VERBOSE) {
+          System.out.println("  range " + rangeID + ": " + ranges[rangeID]);      
+        }
 
         // Do "slow but hopefully correct" computation of
         // expected count:
@@ -360,15 +496,46 @@ public class TestRangeFacetCounts extend
       DoubleRange[] ranges = new DoubleRange[numRange];
       int[] expectedCounts = new int[numRange];
       for(int rangeID=0;rangeID<numRange;rangeID++) {
-        double min = random().nextDouble();
-        double max = random().nextDouble();
+        double min;
+        if (rangeID > 0 && random().nextInt(10) == 7) {
+          // Use an existing boundary:
+          DoubleRange prevRange = ranges[random().nextInt(rangeID)];
+          if (random().nextBoolean()) {
+            min = prevRange.min;
+          } else {
+            min = prevRange.max;
+          }
+        } else {
+          min = random().nextDouble();
+        }
+        double max;
+        if (rangeID > 0 && random().nextInt(10) == 7) {
+          // Use an existing boundary:
+          DoubleRange prevRange = ranges[random().nextInt(rangeID)];
+          if (random().nextBoolean()) {
+            max = prevRange.min;
+          } else {
+            max = prevRange.max;
+          }
+        } else {
+          max = random().nextDouble();
+        }
+
         if (min > max) {
           double x = min;
           min = max;
           max = x;
         }
-        boolean minIncl = random().nextBoolean();
-        boolean maxIncl = random().nextBoolean();
+
+        boolean minIncl;
+        boolean maxIncl;
+        if (min == max) {
+          minIncl = true;
+          maxIncl = true;
+        } else {
+          minIncl = random().nextBoolean();
+          maxIncl = random().nextBoolean();
+        }
         ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl);
 
         // Do "slow but hopefully correct" computation of
@@ -444,15 +611,46 @@ public class TestRangeFacetCounts extend
       DoubleRange[] ranges = new DoubleRange[numRange];
       int[] expectedCounts = new int[numRange];
       for(int rangeID=0;rangeID<numRange;rangeID++) {
-        double min = random().nextDouble();
-        double max = random().nextDouble();
+        double min;
+        if (rangeID > 0 && random().nextInt(10) == 7) {
+          // Use an existing boundary:
+          DoubleRange prevRange = ranges[random().nextInt(rangeID)];
+          if (random().nextBoolean()) {
+            min = prevRange.min;
+          } else {
+            min = prevRange.max;
+          }
+        } else {
+          min = random().nextDouble();
+        }
+        double max;
+        if (rangeID > 0 && random().nextInt(10) == 7) {
+          // Use an existing boundary:
+          DoubleRange prevRange = ranges[random().nextInt(rangeID)];
+          if (random().nextBoolean()) {
+            max = prevRange.min;
+          } else {
+            max = prevRange.max;
+          }
+        } else {
+          max = random().nextDouble();
+        }
+
         if (min > max) {
           double x = min;
           min = max;
           max = x;
         }
-        boolean minIncl = random().nextBoolean();
-        boolean maxIncl = random().nextBoolean();
+
+        boolean minIncl;
+        boolean maxIncl;
+        if (min == max) {
+          minIncl = true;
+          maxIncl = true;
+        } else {
+          minIncl = random().nextBoolean();
+          maxIncl = random().nextBoolean();
+        }
         ranges[rangeID] = new DoubleRange("r" + rangeID, min, minIncl, max, maxIncl);
 
         // Do "slow but hopefully correct" computation of
@@ -531,7 +729,7 @@ public class TestRangeFacetCounts extend
         new LongRange("90 or above", 90L, true, 100L, false),
         new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false));
     
-    assertEquals("dim=field path=[] value=100 childCount=5\n  less than 10 (8)\n  less than or equal to 10 (8)\n  over 90 (8)\n  90 or above (8)\n  over 1000 (0)\n",
+    assertEquals("dim=field path=[] value=16 childCount=5\n  less than 10 (8)\n  less than or equal to 10 (8)\n  over 90 (8)\n  90 or above (8)\n  over 1000 (0)\n",
                  facets.getTopChildren(10, "field").toString());
 
     IOUtils.close(w, r, d);