You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2020/05/11 16:34:14 UTC
[lucene-solr] branch master updated: LUCENE-7889: Allow grouping on
Double/LongValuesSource (#1484)
This is an automated email from the ASF dual-hosted git repository.
romseygeek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
The following commit(s) were added to refs/heads/master by this push:
new 7c350d2 LUCENE-7889: Allow grouping on Double/LongValuesSource (#1484)
7c350d2 is described below
commit 7c350d22c7070acf362fb4f477e6797f2711c910
Author: Alan Woodward <ro...@apache.org>
AuthorDate: Mon May 11 17:34:01 2020 +0100
LUCENE-7889: Allow grouping on Double/LongValuesSource (#1484)
The grouping module currently allows grouping on a SortedDocValues field, or on
a ValueSource. The latter groups only on exact values, and so will not perform well
on numeric-valued fields. This commit adds the ability to group by defined ranges
from a Long or DoubleValuesSource.
---
lucene/CHANGES.txt | 4 +-
.../apache/lucene/search/grouping/DoubleRange.java | 59 ++++++++++++
.../lucene/search/grouping/DoubleRangeFactory.java | 67 ++++++++++++++
.../search/grouping/DoubleRangeGroupSelector.java | 100 ++++++++++++++++++++
.../grouping/FirstPassGroupingCollector.java | 1 +
.../lucene/search/grouping/GroupSelector.java | 10 +-
.../lucene/search/grouping/GroupingSearch.java | 4 +
.../apache/lucene/search/grouping/LongRange.java | 58 ++++++++++++
.../lucene/search/grouping/LongRangeFactory.java | 67 ++++++++++++++
.../search/grouping/LongRangeGroupSelector.java | 101 +++++++++++++++++++++
.../grouping/SecondPassGroupingCollector.java | 1 +
.../lucene/search/grouping/TermGroupSelector.java | 4 +
.../search/grouping/ValueSourceGroupSelector.java | 6 +-
.../lucene/search/grouping/package-info.java | 23 ++---
.../grouping/DoubleRangeGroupSelectorTest.java | 59 ++++++++++++
.../grouping/LongRangeGroupSelectorTest.java | 59 ++++++++++++
.../search/grouping/TestDoubleRangeFactory.java | 39 ++++++++
.../search/grouping/TestLongRangeFactory.java | 39 ++++++++
18 files changed, 683 insertions(+), 18 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 484556c..a4bf772 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -159,7 +159,9 @@ API Changes
New Features
---------------------
-(No changes)
+
+* LUCENE-7889: Grouping by range based on values from DoubleValuesSource and LongValuesSource
+ (Alan Woodward)
Improvements
---------------------
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/DoubleRange.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/DoubleRange.java
new file mode 100644
index 0000000..df34f6b
--- /dev/null
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/DoubleRange.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+import java.util.Objects;
+
+/**
+ * Represents a contiguous range of double values, with an inclusive minimum and
+ * exclusive maximum
+ */
+public class DoubleRange {
+
+ /** The inclusive minimum value of this range */
+ public double min;
+ /** The exclusive maximum value of this range */
+ public double max;
+
+ /**
+ * Creates a new double range, running from {@code min} inclusive to {@code max} exclusive
+ */
+ public DoubleRange(double min, double max) {
+ this.min = min;
+ this.max = max;
+ }
+
+ @Override
+ public String toString() {
+ return "DoubleRange(" + min + ", " + max + ")";
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ DoubleRange that = (DoubleRange) o;
+ return Double.compare(that.min, min) == 0 &&
+ Double.compare(that.max, max) == 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(min, max);
+ }
+}
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/DoubleRangeFactory.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/DoubleRangeFactory.java
new file mode 100644
index 0000000..3ea4606
--- /dev/null
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/DoubleRangeFactory.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+/**
+ * Groups double values into ranges
+ */
+public class DoubleRangeFactory {
+
+ private final double min;
+ private final double width;
+ private final double max;
+
+ /**
+ * Creates a new DoubleRangeFactory
+ * @param min a minimum value; all doubles below this value are grouped into a single range
+ * @param width a standard width; all ranges between {@code min} and {@code max} are this wide,
+ * with the exception of the final range which may be up to this width. Ranges
+ * are inclusive at the lower end, and exclusive at the upper end.
+ * @param max a maximum value; all doubles above this value are grouped into a single range
+ */
+ public DoubleRangeFactory(double min, double width, double max) {
+ this.min = min;
+ this.width = width;
+ this.max = max;
+ }
+
+ /**
+ * Finds the DoubleRange that a value should be grouped into
+ * @param value the value to group
+ * @param reuse an existing DoubleRange object to reuse
+ */
+ public DoubleRange getRange(double value, DoubleRange reuse) {
+ if (reuse == null)
+ reuse = new DoubleRange(Double.MIN_VALUE, Double.MAX_VALUE);
+ if (value < min) {
+ reuse.max = min;
+ reuse.min = Double.MIN_VALUE;
+ return reuse;
+ }
+ if (value >= max) {
+ reuse.min = max;
+ reuse.max = Double.MAX_VALUE;
+ return reuse;
+ }
+ double bucket = Math.floor((value - min) / width);
+ reuse.min = min + (bucket * width);
+ reuse.max = reuse.min + width;
+ return reuse;
+ }
+
+}
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/DoubleRangeGroupSelector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/DoubleRangeGroupSelector.java
new file mode 100644
index 0000000..4a6a65a
--- /dev/null
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/DoubleRangeGroupSelector.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.DoubleValues;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.Scorable;
+
+/**
+ * A GroupSelector implementation that groups documents by double values
+ */
+public class DoubleRangeGroupSelector extends GroupSelector<DoubleRange> {
+
+ private final DoubleValuesSource source;
+ private final DoubleRangeFactory rangeFactory;
+
+ private Set<DoubleRange> inSecondPass;
+ private boolean includeEmpty = true;
+ private boolean positioned;
+ private DoubleRange current;
+
+ private LeafReaderContext context;
+ private DoubleValues values;
+
+ /**
+ * Creates a new DoubleRangeGroupSelector
+ * @param source a DoubleValuesSource to retrieve double values per document
+ * @param rangeFactory a DoubleRangeFactory that defines how to group the double values into range buckets
+ */
+ public DoubleRangeGroupSelector(DoubleValuesSource source, DoubleRangeFactory rangeFactory) {
+ this.source = source;
+ this.rangeFactory = rangeFactory;
+ }
+
+ @Override
+ public void setNextReader(LeafReaderContext readerContext) throws IOException {
+ this.context = readerContext;
+ }
+
+ @Override
+ public void setScorer(Scorable scorer) throws IOException {
+ this.values = source.getValues(context, DoubleValuesSource.fromScorer(scorer));
+ }
+
+ @Override
+ public State advanceTo(int doc) throws IOException {
+ positioned = values.advanceExact(doc);
+ if (positioned == false) {
+ return includeEmpty ? State.ACCEPT : State.SKIP;
+ }
+ this.current = rangeFactory.getRange(values.doubleValue(), this.current);
+ if (inSecondPass == null) {
+ return State.ACCEPT;
+ }
+ return inSecondPass.contains(this.current) ? State.ACCEPT : State.SKIP;
+ }
+
+ @Override
+ public DoubleRange currentValue() throws IOException {
+ return positioned ? this.current : null;
+ }
+
+ @Override
+ public DoubleRange copyValue() throws IOException {
+ return positioned ? new DoubleRange(this.current.min, this.current.max) : null;
+ }
+
+ @Override
+ public void setGroups(Collection<SearchGroup<DoubleRange>> searchGroups) {
+ inSecondPass = new HashSet<>();
+ includeEmpty = false;
+ for (SearchGroup<DoubleRange> group : searchGroups) {
+ if (group.groupValue == null)
+ includeEmpty = true;
+ else
+ inSecondPass.add(group.groupValue);
+ }
+ }
+}
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
index 6a745b8..f5b0597 100644
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
@@ -151,6 +151,7 @@ public class FirstPassGroupingCollector<T> extends SimpleCollector {
@Override
public void setScorer(Scorable scorer) throws IOException {
+ groupSelector.setScorer(scorer);
for (LeafFieldComparator comparator : leafComparators) {
comparator.setScorer(scorer);
}
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupSelector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupSelector.java
index dbb0932..92962a4 100644
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupSelector.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupSelector.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.Scorable;
/**
* Defines a group, for use by grouping collectors
@@ -44,6 +45,11 @@ public abstract class GroupSelector<T> {
public abstract void setNextReader(LeafReaderContext readerContext) throws IOException;
/**
+ * Set the current Scorer
+ */
+ public abstract void setScorer(Scorable scorer) throws IOException;
+
+ /**
* Advance the GroupSelector's iterator to the given document
*/
public abstract State advanceTo(int doc) throws IOException;
@@ -53,12 +59,12 @@ public abstract class GroupSelector<T> {
*
* N.B. this object may be reused, for a persistent version use {@link #copyValue()}
*/
- public abstract T currentValue();
+ public abstract T currentValue() throws IOException;
/**
* @return a copy of the group value of the current document
*/
- public abstract T copyValue();
+ public abstract T copyValue() throws IOException;
/**
* Set a restriction on the group values returned by this selector
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java
index 3d0da81..55fda8b 100644
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/GroupingSearch.java
@@ -71,6 +71,10 @@ public class GroupingSearch {
this(new TermGroupSelector(groupField), null);
}
+ /**
+ * Constructs a <code>GroupingSearch</code> instance that groups documents using a {@link GroupSelector}
+ * @param groupSelector a {@link GroupSelector} that defines groups for this GroupingSearch
+ */
public GroupingSearch(GroupSelector<?> groupSelector) {
this(groupSelector, null);
}
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/LongRange.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/LongRange.java
new file mode 100644
index 0000000..7b6c845
--- /dev/null
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/LongRange.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+import java.util.Objects;
+
+/**
+ * Represents a contiguous range of long values, with an inclusive minimum and
+ * exclusive maximum
+ */
+public class LongRange {
+
+ /** The inclusive minimum value of this range */
+ public long min;
+ /** The exclusive maximum value of this range */
+ public long max;
+
+ /**
+ * Creates a new double range, running from {@code min} inclusive to {@code max} exclusive
+ */
+ public LongRange(long min, long max) {
+ this.min = min;
+ this.max = max;
+ }
+
+ @Override
+ public String toString() {
+ return "LongRange(" + min + ", " + max + ")";
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ LongRange that = (LongRange) o;
+ return that.min == min && that.max == max;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(min, max);
+ }
+}
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/LongRangeFactory.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/LongRangeFactory.java
new file mode 100644
index 0000000..be66647
--- /dev/null
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/LongRangeFactory.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+/**
+ * Groups double values into ranges
+ */
+public class LongRangeFactory {
+
+ private final long min;
+ private final long width;
+ private final long max;
+
+ /**
+ * Creates a new LongRangeFactory
+ * @param min a minimum value; all longs below this value are grouped into a single range
+ * @param width a standard width; all ranges between {@code min} and {@code max} are this wide,
+ * with the exception of the final range which may be up to this width. Ranges
+ * are inclusive at the lower end, and exclusive at the upper end.
+ * @param max a maximum value; all longs above this value are grouped into a single range
+ */
+ public LongRangeFactory(long min, long width, long max) {
+ this.min = min;
+ this.width = width;
+ this.max = max;
+ }
+
+ /**
+ * Finds the LongRange that a value should be grouped into
+ * @param value the value to group
+ * @param reuse an existing LongRange object to reuse
+ */
+ public LongRange getRange(long value, LongRange reuse) {
+ if (reuse == null)
+ reuse = new LongRange(Long.MIN_VALUE, Long.MAX_VALUE);
+ if (value < min) {
+ reuse.max = min;
+ reuse.min = Long.MIN_VALUE;
+ return reuse;
+ }
+ if (value >= max) {
+ reuse.min = max;
+ reuse.max = Long.MAX_VALUE;
+ return reuse;
+ }
+ long bucket = (value - min) / width;
+ reuse.min = min + (bucket * width);
+ reuse.max = reuse.min + width;
+ return reuse;
+ }
+
+}
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/LongRangeGroupSelector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/LongRangeGroupSelector.java
new file mode 100644
index 0000000..7dd0c23
--- /dev/null
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/LongRangeGroupSelector.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.LongValues;
+import org.apache.lucene.search.LongValuesSource;
+import org.apache.lucene.search.Scorable;
+
+/**
+ * A GroupSelector implementation that groups documents by long values
+ */
+public class LongRangeGroupSelector extends GroupSelector<LongRange> {
+
+ private final LongValuesSource source;
+ private final LongRangeFactory rangeFactory;
+
+ private Set<LongRange> inSecondPass;
+ private boolean includeEmpty = true;
+ private boolean positioned;
+ private LongRange current;
+
+ private LeafReaderContext context;
+ private LongValues values;
+
+ /**
+ * Creates a new LongRangeGroupSelector
+ * @param source a LongValuesSource to retrieve long values per document
+ * @param rangeFactory a LongRangeFactory that defines how to group the long values into range buckets
+ */
+ public LongRangeGroupSelector(LongValuesSource source, LongRangeFactory rangeFactory) {
+ this.source = source;
+ this.rangeFactory = rangeFactory;
+ }
+
+ @Override
+ public void setNextReader(LeafReaderContext readerContext) throws IOException {
+ this.context = readerContext;
+ }
+
+ @Override
+ public void setScorer(Scorable scorer) throws IOException {
+ this.values = source.getValues(context, DoubleValuesSource.fromScorer(scorer));
+ }
+
+ @Override
+ public State advanceTo(int doc) throws IOException {
+ positioned = values.advanceExact(doc);
+ if (positioned == false) {
+ return includeEmpty ? State.ACCEPT : State.SKIP;
+ }
+ this.current = rangeFactory.getRange(values.longValue(), this.current);
+ if (inSecondPass == null) {
+ return State.ACCEPT;
+ }
+ return inSecondPass.contains(this.current) ? State.ACCEPT : State.SKIP;
+ }
+
+ @Override
+ public LongRange currentValue() throws IOException {
+ return positioned ? this.current : null;
+ }
+
+ @Override
+ public LongRange copyValue() throws IOException {
+ return positioned ? new LongRange(this.current.min, this.current.max) : null;
+ }
+
+ @Override
+ public void setGroups(Collection<SearchGroup<LongRange>> searchGroups) {
+ inSecondPass = new HashSet<>();
+ includeEmpty = false;
+ for (SearchGroup<LongRange> group : searchGroups) {
+ if (group.groupValue == null)
+ includeEmpty = true;
+ else
+ inSecondPass.add(group.groupValue);
+ }
+ }
+}
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java
index 0d5fc9d..dc7d0aa 100644
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java
@@ -78,6 +78,7 @@ public class SecondPassGroupingCollector<T> extends SimpleCollector {
@Override
public void setScorer(Scorable scorer) throws IOException {
+ groupSelector.setScorer(scorer);
groupReducer.setScorer(scorer);
}
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupSelector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupSelector.java
index 5b8f77c..65213b2 100644
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupSelector.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupSelector.java
@@ -25,6 +25,7 @@ import java.util.Map;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.search.Scorable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
@@ -65,6 +66,9 @@ public class TermGroupSelector extends GroupSelector<BytesRef> {
}
@Override
+ public void setScorer(Scorable scorer) throws IOException { }
+
+ @Override
public State advanceTo(int doc) throws IOException {
if (this.docValues.advanceExact(doc) == false) {
groupId = -1;
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/ValueSourceGroupSelector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/ValueSourceGroupSelector.java
index 2490160..8ff945a 100644
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/ValueSourceGroupSelector.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/ValueSourceGroupSelector.java
@@ -26,6 +26,7 @@ import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.search.Scorable;
import org.apache.lucene.util.mutable.MutableValue;
/**
@@ -57,6 +58,9 @@ public class ValueSourceGroupSelector extends GroupSelector<MutableValue> {
}
@Override
+ public void setScorer(Scorable scorer) throws IOException { }
+
+ @Override
public State advanceTo(int doc) throws IOException {
this.filler.fillValue(doc);
if (secondPassGroups != null) {
@@ -67,7 +71,7 @@ public class ValueSourceGroupSelector extends GroupSelector<MutableValue> {
}
@Override
- public MutableValue currentValue() {
+ public MutableValue currentValue() throws IOException {
return filler.getValue();
}
diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/package-info.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/package-info.java
index 7e3745e..36d94a5 100644
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/package-info.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/package-info.java
@@ -28,11 +28,9 @@
* <p>Grouping requires a number of inputs:</p>
*
* <ul>
- * <li><code>groupField</code>: this is the field used for grouping.
- * For example, if you use the <code>author</code> field then each
- * group has all books by the same author. Documents that don't
- * have this field are grouped under a single group with
- * a <code>null</code> group value.
+ * <li><code>groupSelector</code>: this defines how groups are created
+ * from values per-document. The grouping module ships with
+ * selectors for grouping by term, and by long and double ranges.
*
* <li><code>groupSort</code>: how the groups are sorted. For sorting
* purposes, each group is "represented" by the highest-sorted
@@ -80,6 +78,10 @@
* the value of a {@link org.apache.lucene.index.SortedDocValues} field</li>
* <li>{@link org.apache.lucene.search.grouping.ValueSourceGroupSelector} groups based on
* the value of a {@link org.apache.lucene.queries.function.ValueSource}</li>
+ * <li>{@link org.apache.lucene.search.grouping.DoubleRangeGroupSelector} groups based on
+ * the value of a {@link org.apache.lucene.search.DoubleValuesSource}</li>
+ * <li>{@link org.apache.lucene.search.grouping.LongRangeGroupSelector} groups based on
+ * the value of a {@link org.apache.lucene.search.LongValuesSource}</li>
* </ul>
*
* <p>Known limitations:</p>
@@ -137,17 +139,10 @@
* writer.addDocuments(oneGroup);
* </pre>
*
- * Then, at search time, do this up front:
+ * Then, at search time:
*
* <pre class="prettyprint">
- * // Set this once in your app & save away for reusing across all queries:
- * Filter groupEndDocs = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupEnd", "x"))));
- * </pre>
- *
- * Finally, do this per search:
- *
- * <pre class="prettyprint">
- * // Per search:
+ * Query groupEndDocs = new TermQuery(new Term("groupEnd", "x"));
* BlockGroupingCollector c = new BlockGroupingCollector(groupSort, groupOffset+topNGroups, needsScores, groupEndDocs);
* s.search(new TermQuery(new Term("content", searchTerm)), c);
* TopGroups groupsResult = c.getTopGroups(withinGroupSort, groupOffset, docOffset, docOffset+docsPerGroup, fillFields);
diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/DoubleRangeGroupSelectorTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/DoubleRangeGroupSelectorTest.java
new file mode 100644
index 0000000..c938589
--- /dev/null
+++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/DoubleRangeGroupSelectorTest.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
+import org.apache.lucene.search.DoubleValuesSource;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+
+public class DoubleRangeGroupSelectorTest extends BaseGroupSelectorTestCase<DoubleRange> {
+
+ @Override
+ protected void addGroupField(Document document, int id) {
+ if (rarely()) {
+ return; // missing value
+ }
+ // numbers between 0 and 1000, groups are 100 wide from 100 to 900
+ double value = random().nextDouble() * 1000;
+ document.add(new DoublePoint("double", value));
+ document.add(new NumericDocValuesField("double", Double.doubleToLongBits(value)));
+ }
+
+ @Override
+ protected GroupSelector<DoubleRange> getGroupSelector() {
+ return new DoubleRangeGroupSelector(DoubleValuesSource.fromDoubleField("double"),
+ new DoubleRangeFactory(100, 100, 900));
+ }
+
+ @Override
+ protected Query filterQuery(DoubleRange groupValue) {
+ if (groupValue == null) {
+ return new BooleanQuery.Builder()
+ .add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER)
+ .add(new DocValuesFieldExistsQuery("double"), BooleanClause.Occur.MUST_NOT)
+ .build();
+ }
+ return DoublePoint.newRangeQuery("double", groupValue.min, Math.nextDown(groupValue.max));
+ }
+}
diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/LongRangeGroupSelectorTest.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/LongRangeGroupSelectorTest.java
new file mode 100644
index 0000000..6075f70
--- /dev/null
+++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/LongRangeGroupSelectorTest.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
+import org.apache.lucene.search.LongValuesSource;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.Query;
+
+public class LongRangeGroupSelectorTest extends BaseGroupSelectorTestCase<LongRange> {
+
+ @Override
+ protected void addGroupField(Document document, int id) {
+ if (rarely()) {
+ return; // missing value
+ }
+ // numbers between 0 and 1000, groups are 100 wide from 100 to 900
+ long value = random().nextInt(1000);
+ document.add(new LongPoint("long", value));
+ document.add(new NumericDocValuesField("long", value));
+ }
+
+ @Override
+ protected GroupSelector<LongRange> getGroupSelector() {
+ return new LongRangeGroupSelector(LongValuesSource.fromLongField("long"),
+ new LongRangeFactory(100, 100, 900));
+ }
+
+ @Override
+ protected Query filterQuery(LongRange groupValue) {
+ if (groupValue == null) {
+ return new BooleanQuery.Builder()
+ .add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER)
+ .add(new DocValuesFieldExistsQuery("long"), BooleanClause.Occur.MUST_NOT)
+ .build();
+ }
+ return LongPoint.newRangeQuery("long", groupValue.min, groupValue.max - 1);
+ }
+}
diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestDoubleRangeFactory.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestDoubleRangeFactory.java
new file mode 100644
index 0000000..d6e05c3
--- /dev/null
+++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestDoubleRangeFactory.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestDoubleRangeFactory extends LuceneTestCase {
+
+ public void test() {
+
+ DoubleRangeFactory factory = new DoubleRangeFactory(10, 10, 50);
+ DoubleRange scratch = new DoubleRange(0, 0);
+
+ assertEquals(new DoubleRange(Double.MIN_VALUE, 10), factory.getRange(4, scratch));
+ assertEquals(new DoubleRange(10, 20), factory.getRange(10, scratch));
+ assertEquals(new DoubleRange(20, 30), factory.getRange(20, scratch));
+ assertEquals(new DoubleRange(10, 20), factory.getRange(15, scratch));
+ assertEquals(new DoubleRange(30, 40), factory.getRange(35, scratch));
+ assertEquals(new DoubleRange(50, Double.MAX_VALUE), factory.getRange(50, scratch));
+ assertEquals(new DoubleRange(50, Double.MAX_VALUE), factory.getRange(500, scratch));
+
+ }
+
+}
diff --git a/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestLongRangeFactory.java b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestLongRangeFactory.java
new file mode 100644
index 0000000..0677631
--- /dev/null
+++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestLongRangeFactory.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestLongRangeFactory extends LuceneTestCase {
+
+ public void test() {
+
+ LongRangeFactory factory = new LongRangeFactory(10, 10, 50);
+ LongRange scratch = new LongRange(0, 0);
+
+ assertEquals(new LongRange(Long.MIN_VALUE, 10), factory.getRange(4, scratch));
+ assertEquals(new LongRange(10, 20), factory.getRange(10, scratch));
+ assertEquals(new LongRange(20, 30), factory.getRange(20, scratch));
+ assertEquals(new LongRange(10, 20), factory.getRange(15, scratch));
+ assertEquals(new LongRange(30, 40), factory.getRange(35, scratch));
+ assertEquals(new LongRange(50, Long.MAX_VALUE), factory.getRange(50, scratch));
+ assertEquals(new LongRange(50, Long.MAX_VALUE), factory.getRange(500, scratch));
+
+ }
+
+}