You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2016/08/23 17:00:06 UTC
lucene-solr:branch_6x: SOLR-9432: JSON Facet refactoring to support
refinement
Repository: lucene-solr
Updated Branches:
refs/heads/branch_6x a3f58fc54 -> 5d29fd87f
SOLR-9432: JSON Facet refactoring to support refinement
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5d29fd87
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5d29fd87
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5d29fd87
Branch: refs/heads/branch_6x
Commit: 5d29fd87f14fe4eb2e18a6f631f386df5f6d31f5
Parents: a3f58fc
Author: yonik <yo...@apache.org>
Authored: Tue Aug 23 12:54:49 2016 -0400
Committer: yonik <yo...@apache.org>
Committed: Tue Aug 23 12:55:32 2016 -0400
----------------------------------------------------------------------
.../apache/solr/search/facet/FacetField.java | 47 +-
.../solr/search/facet/FacetFieldMerger.java | 213 ++++++++
.../apache/solr/search/facet/FacetMerger.java | 126 ++++-
.../apache/solr/search/facet/FacetModule.java | 544 +++++++------------
.../apache/solr/search/facet/FacetRange.java | 8 +-
.../solr/search/facet/FacetRangeMerger.java | 123 +++++
.../apache/solr/search/facet/FacetRequest.java | 69 ++-
.../search/facet/FacetRequestSortedMerger.java | 234 ++++++++
.../org/apache/solr/search/facet/HLLAgg.java | 2 +-
.../apache/solr/search/facet/PercentileAgg.java | 2 +-
.../org/apache/solr/search/facet/UniqueAgg.java | 2 +-
.../src/java/org/apache/solr/JSONTestUtil.java | 13 +
12 files changed, 994 insertions(+), 389 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
index c06e182..50c31d4 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
@@ -23,38 +23,45 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
+// Any type of facet request that generates a variable number of buckets
+// and the ability to sort by those generated buckets.
+abstract class FacetRequestSorted extends FacetRequest {
+ long offset;
+ long limit;
+ long mincount;
+ String sortVariable;
+ SortDirection sortDirection;
+ RefineMethod refine; // null, NONE, or SIMPLE
+
+ @Override
+ public RefineMethod getRefineMethod() {
+ return refine;
+ }
-public class FacetField extends FacetRequest {
+ @Override
+ public boolean returnsPartial() {
+ return limit > 0;
+ }
+
+}
+
+
+public class FacetField extends FacetRequestSorted {
String field;
- long offset;
- long limit = 10;
- long mincount = 1;
boolean missing;
boolean allBuckets; // show cumulative stats across all buckets (this can be different than non-bucketed stats across all docs because of multi-valued docs)
boolean numBuckets;
String prefix;
- String sortVariable;
- SortDirection sortDirection;
FacetMethod method;
int cacheDf; // 0 means "default", -1 means "never cache"
// experimental - force perSeg collection when using dv method, currently for testing purposes only.
Boolean perSeg;
- // TODO: put this somewhere more generic?
- public enum SortDirection {
- asc(-1) ,
- desc(1);
-
- private final int multiplier;
- private SortDirection(int multiplier) {
- this.multiplier = multiplier;
- }
-
- // asc==-1, desc==1
- public int getMultiplier() {
- return multiplier;
- }
+ {
+ // defaults for FacetRequestSorted
+ mincount = 1;
+ limit = 10;
}
public enum FacetMethod {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java
new file mode 100644
index 0000000..8a26f51
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldMerger.java
@@ -0,0 +1,213 @@
+package org.apache.solr.search.facet;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.solr.common.util.SimpleOrderedMap;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO: refactor more out to base class
+public class FacetFieldMerger extends FacetRequestSortedMerger<FacetField> {
+ FacetBucket missingBucket;
+ FacetBucket allBuckets;
+ FacetMerger numBuckets;
+ int[] numReturnedPerShard;
+
+ // LinkedHashMap<Object,FacetBucket> buckets = new LinkedHashMap<>();
+ // List<FacetBucket> sortedBuckets;
+ int numReturnedBuckets; // the number of buckets in the bucket lists returned from all of the shards
+
+
+ public FacetFieldMerger(FacetField freq) {
+ super(freq);
+ }
+
+ @Override
+ public void merge(Object facetResult, Context mcontext) {
+ if (numReturnedPerShard == null) {
+ numReturnedPerShard = new int[mcontext.numShards];
+ }
+ merge((SimpleOrderedMap)facetResult, mcontext);
+ }
+
+ protected void merge(SimpleOrderedMap facetResult, Context mcontext) {
+ if (freq.missing) {
+ Object o = facetResult.get("missing");
+ if (o != null) {
+ if (missingBucket == null) {
+ missingBucket = newBucket(null, mcontext);
+ }
+ missingBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
+ }
+ }
+
+ if (freq.allBuckets) {
+ Object o = facetResult.get("allBuckets");
+ if (o != null) {
+ if (allBuckets == null) {
+ allBuckets = newBucket(null, mcontext);
+ }
+ allBuckets.mergeBucket((SimpleOrderedMap)o , mcontext);
+ }
+ }
+
+ List<SimpleOrderedMap> bucketList = (List<SimpleOrderedMap>) facetResult.get("buckets");
+ numReturnedPerShard[mcontext.shardNum] = bucketList.size();
+ numReturnedBuckets += bucketList.size();
+ mergeBucketList(bucketList , mcontext);
+
+ if (freq.numBuckets) {
+ Object nb = facetResult.get("numBuckets");
+ if (nb != null) {
+ if (numBuckets == null) {
+ numBuckets = new FacetNumBucketsMerger();
+ }
+ numBuckets.merge(nb , mcontext);
+ }
+ }
+
+ }
+
+
+
+
+ @Override
+ public Object getMergedResult() {
+ SimpleOrderedMap result = new SimpleOrderedMap();
+
+ if (numBuckets != null) {
+ int removed = 0;
+ if (freq.mincount > 1) {
+ for (FacetBucket bucket : buckets.values()) {
+ if (bucket.count < freq.mincount) removed++;
+ }
+ }
+ result.add("numBuckets", ((Number)numBuckets.getMergedResult()).longValue() - removed);
+
+ // TODO: we can further increase this estimate.
+ // If not sorting by count, use a simple ratio to scale
+ // If sorting by count desc, then add up the highest_possible_missing_count from each shard
+ }
+
+ sortBuckets();
+
+ int first = (int)freq.offset;
+ int end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE;
+ int last = Math.min(sortedBuckets.size(), end);
+
+ List<SimpleOrderedMap> resultBuckets = new ArrayList<>(Math.max(0, (last - first)));
+
+ /** this only works if there are no filters (like mincount)
+ for (int i=first; i<last; i++) {
+ FacetBucket bucket = sortedBuckets.get(i);
+ resultBuckets.add( bucket.getMergedBucket() );
+ }
+ ***/
+
+ // TODO: change effective offsets + limits at shards...
+
+ int off = (int)freq.offset;
+ int lim = freq.limit >= 0 ? (int)freq.limit : Integer.MAX_VALUE;
+ for (FacetBucket bucket : sortedBuckets) {
+ if (bucket.getCount() < freq.mincount) {
+ continue;
+ }
+
+ if (off > 0) {
+ --off;
+ continue;
+ }
+
+ if (resultBuckets.size() >= lim) {
+ break;
+ }
+
+ resultBuckets.add( bucket.getMergedBucket() );
+ }
+
+
+ result.add("buckets", resultBuckets);
+ if (missingBucket != null) {
+ result.add("missing", missingBucket.getMergedBucket());
+ }
+ if (allBuckets != null) {
+ result.add("allBuckets", allBuckets.getMergedBucket());
+ }
+
+ return result;
+ }
+
+
+ @Override
+ public void finish(Context mcontext) {
+ // TODO: check refine of subs?
+ // TODO: call subs each time with a shard/shardnum that is missing a bucket at this level?
+ // or pass a bit vector of shards w/ value???
+
+ // build up data structure and only then call the context (or whatever) to do the refinement?
+ // basically , only do at the top-level facet?
+ }
+
+
+
+ private class FacetNumBucketsMerger extends FacetMerger {
+ long sumBuckets;
+ long shardsMissingSum;
+ long shardsTruncatedSum;
+ Set<Object> values;
+
+ @Override
+ public void merge(Object facetResult, Context mcontext) {
+ SimpleOrderedMap map = (SimpleOrderedMap)facetResult;
+ long numBuckets = ((Number)map.get("numBuckets")).longValue();
+ sumBuckets += numBuckets;
+
+ List vals = (List)map.get("vals");
+ if (vals != null) {
+ if (values == null) {
+ values = new HashSet<>(vals.size()*4);
+ }
+ values.addAll(vals);
+ if (numBuckets > values.size()) {
+ shardsTruncatedSum += numBuckets - values.size();
+ }
+ } else {
+ shardsMissingSum += numBuckets;
+ }
+ }
+
+ @Override
+ public void finish(Context mcontext) {
+ // nothing to do
+ }
+
+ @Override
+ public Object getMergedResult() {
+ long exactCount = values == null ? 0 : values.size();
+ return exactCount + shardsMissingSum + shardsTruncatedSum;
+ // TODO: reduce count by (at least) number of buckets that fail to hit mincount (after merging)
+ // that should make things match for most of the small tests at least
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/core/src/java/org/apache/solr/search/facet/FacetMerger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetMerger.java
index a8573c0..9499d2c 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetMerger.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetMerger.java
@@ -16,17 +16,131 @@
*/
package org.apache.solr.search.facet;
-//
-// The FacetMerger code is in the prototype stage, and this is the reason that
-// many implementations are all in this file. They can be moved to separate
-// files after the interfaces are locked down more.
-//
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.IdentityHashMap;
+import java.util.Map;
+
+import static org.apache.solr.search.facet.FacetRequest.RefineMethod.SIMPLE;
+
+
public abstract class FacetMerger {
public abstract void merge(Object facetResult, Context mcontext);
+
+ // FIXME
+ // public abstract Map<String,Object> getRefinement(Context mcontext);
+ public Map<String,Object> getRefinement(Context mcontext) {
+ return null;
+ }
+ public abstract void finish(Context mcontext);
public abstract Object getMergedResult();
+ // This class lets mergers know overall context such as what shard is being merged
+ // and what buckets have been seen by what shard.
public static class Context {
// FacetComponentState state; // todo: is this needed?
- Object root;
+ final int numShards;
+ private final BitSet sawShard = new BitSet(); // [bucket0_shard0, bucket0_shard1, bucket0_shard2, bucket1_shard0, bucket1_shard1, bucket1_shard2]
+ private Map<String,Integer> shardmap = new HashMap<>();
+
+ public Context(int numShards) {
+ this.numShards = numShards;
+ }
+
+ Object root; // per-shard response
+ int maxBucket; // the current max bucket across all bucket types... incremented as we encounter more
+ int shardNum = -1; // TODO: keep same mapping across multiple phases...
+ boolean bucketWasMissing;
+
+ public void newShard(String shard) {
+ Integer prev = shardmap.put(shard, ++shardNum);
+ assert prev == null;
+ this.bucketWasMissing = false;
+ }
+
+ public void setShard(String shard) {
+ this.shardNum = shardmap.get(shard);
+ }
+
+ public int getNewBucketNumber() {
+ return maxBucket++;
+ }
+
+ public void setShardFlag(int bucketNum) {
+ // rely on normal bitset expansion (uses a doubling strategy)
+ sawShard.set( bucketNum * numShards + shardNum );
+ }
+
+ public boolean getShardFlag(int bucketNum) {
+ return sawShard.get( bucketNum * numShards + shardNum );
+ }
+
+ public boolean bucketWasMissing() {
+ return bucketWasMissing;
+ }
+
+ public boolean setBucketWasMissing(boolean newVal) {
+ boolean oldVal = bucketWasMissing();
+ bucketWasMissing = newVal;
+ return oldVal;
+ }
+
+ private Map<FacetRequest, Collection<String>> refineSubMap = new IdentityHashMap<>(4);
+ public Collection<String> getSubsWithRefinement(FacetRequest freq) {
+ if (freq.getSubFacets().isEmpty()) return Collections.emptyList();
+ Collection<String> subs = refineSubMap.get(freq);
+ if (subs != null) return subs;
+
+ for (Map.Entry<String,FacetRequest> entry : freq.subFacets.entrySet()) {
+ Collection<String> childSubs = getSubsWithRefinement(entry.getValue());
+ if (childSubs.size() > 0 || entry.getValue().getRefineMethod() == SIMPLE) {
+ if (subs == null) {
+ subs = new ArrayList<>(freq.getSubFacets().size());
+ }
+ subs.add(entry.getKey());
+ }
+ }
+
+ if (subs == null) {
+ subs = Collections.emptyList();
+ }
+ refineSubMap.put(freq, subs);
+ return subs;
+ }
+
+
+ private Map<FacetRequest, Collection<String>> partialSubsMap = new IdentityHashMap<>(4);
+ public Collection<String> getSubsWithPartial(FacetRequest freq) {
+ if (freq.getSubFacets().isEmpty()) return Collections.emptyList();
+ Collection<String> subs = partialSubsMap.get(freq);
+ if (subs != null) return subs;
+
+ subs = null;
+ for (Map.Entry<String,FacetRequest> entry : freq.subFacets.entrySet()) {
+ Collection<String> childSubs = getSubsWithPartial(entry.getValue());
+ if (childSubs.size() > 0 || entry.getValue().returnsPartial()) {
+ if (subs == null) {
+ subs = new ArrayList<>(freq.getSubFacets().size());
+ }
+ subs.add(entry.getKey());
+ }
+ }
+
+ if (subs == null) {
+ subs = Collections.emptyList();
+ }
+ partialSubsMap.put(freq, subs);
+ return subs;
+ }
+
+
}
+
+
+
}
+
+
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java
index ad324eb..b2831a3 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetModule.java
@@ -18,6 +18,7 @@ package org.apache.solr.search.facet;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
@@ -29,7 +30,9 @@ import java.util.Set;
import org.apache.solr.client.solrj.SolrResponse;
import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
@@ -41,6 +44,7 @@ import org.apache.solr.handler.component.ShardResponse;
import org.apache.solr.search.QueryContext;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.util.RTimer;
+import org.noggit.JSONUtil;
public class FacetModule extends SearchComponent {
@@ -55,6 +59,7 @@ public class FacetModule extends SearchComponent {
// Internal information passed down from the top level to shards for distributed faceting.
private final static String FACET_STATE = "_facet_";
+ private final static String FACET_REFINE = "refine";
public FacetComponentState getFacetComponentState(ResponseBuilder rb) {
@@ -149,12 +154,90 @@ public class FacetModule extends SearchComponent {
}
+ private void clearFaceting(List<ShardRequest> outgoing) {
+ // turn off faceting for requests not marked as being for faceting refinements
+ for (ShardRequest sreq : outgoing) {
+ if ((sreq.purpose & PURPOSE_REFINE_JSON_FACETS) != 0) continue;
+ sreq.params.remove("json.facet"); // this just saves space... the presence of FACET_STATE really control the faceting
+ sreq.params.remove(FACET_STATE);
+ }
+ }
+
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
FacetComponentState facetState = getFacetComponentState(rb);
if (facetState == null) return ResponseBuilder.STAGE_DONE;
+ if (rb.stage != ResponseBuilder.STAGE_GET_FIELDS) {
+ return ResponseBuilder.STAGE_DONE;
+ }
+
+ // Check if there are any refinements possible
+ if (facetState.mcontext.getSubsWithRefinement(facetState.facetRequest).isEmpty()) {
+ clearFaceting(rb.outgoing);
+ return ResponseBuilder.STAGE_DONE;
+ }
+
+ // Overlap facet refinement requests (those shards that we need a count
+ // for particular facet values from), where possible, with
+ // the requests to get fields (because we know that is the
+ // only other required phase).
+ // We do this in distributedProcess so we can look at all of the
+ // requests in the outgoing queue at once.
+
+ assert rb.shards.length == facetState.mcontext.numShards;
+ for (String shard : rb.shards) {
+ facetState.mcontext.setShard(shard);
+
+ // shard-specific refinement
+ Map<String,Object> refinement = facetState.merger.getRefinement(facetState.mcontext);
+ if (refinement == null) continue;
+
+ boolean newRequest = false;
+ ShardRequest shardsRefineRequest = null;
+
+ // try to find a request that is already going out to that shard.
+ // If nshards becomes too great, we may want to move to hashing for
+ // better scalability.
+ for (ShardRequest sreq : rb.outgoing) {
+ if ( (sreq.purpose & (ShardRequest.PURPOSE_GET_FIELDS|ShardRequest.PURPOSE_REFINE_FACETS|ShardRequest.PURPOSE_REFINE_PIVOT_FACETS)) != 0
+ && sreq.shards != null
+ && sreq.shards.length == 1
+ && sreq.shards[0].equals(shard))
+ {
+ shardsRefineRequest = sreq;
+ break;
+ }
+ }
+
+ if (shardsRefineRequest == null) {
+ // we didn't find any other suitable requests going out to that shard,
+ // so create one ourselves.
+ newRequest = true;
+ shardsRefineRequest = new ShardRequest();
+ shardsRefineRequest.shards = new String[] { shard };
+ shardsRefineRequest.params = new ModifiableSolrParams(rb.req.getParams());
+ // don't request any documents
+ shardsRefineRequest.params.remove(CommonParams.START);
+ shardsRefineRequest.params.set(CommonParams.ROWS, "0");
+ shardsRefineRequest.params.set(CommonParams.ROWS, "0");
+ shardsRefineRequest.params.set(FacetParams.FACET, false);
+ }
+
+ shardsRefineRequest.purpose |= PURPOSE_REFINE_JSON_FACETS;
+
+ Map<String,Object> fstate = new HashMap<>(1);
+ fstate.put(FACET_REFINE, refinement);
+ String fstateString = JSONUtil.toJSON(fstate);
+ shardsRefineRequest.params.add(FACET_STATE, fstateString);
+
+ if (newRequest) {
+ rb.addRequest(this, shardsRefineRequest);
+ }
+ }
+
+ // clearFaceting(rb.outgoing);
return ResponseBuilder.STAGE_DONE;
}
@@ -165,11 +248,13 @@ public class FacetModule extends SearchComponent {
if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) {
sreq.purpose |= FacetModule.PURPOSE_GET_JSON_FACETS;
- sreq.params.set(FACET_STATE, "{}");
+ sreq.params.set(FACET_STATE, "{}"); // The presence of FACET_STATE (_facet_) turns on json faceting
} else {
// turn off faceting on other requests
- sreq.params.remove("json.facet");
+ /*** distributedProcess will need to use other requests for refinement
+ sreq.params.remove("json.facet"); // this just saves space... the presence of FACET_STATE really control the faceting
sreq.params.remove(FACET_STATE);
+ **/
}
}
@@ -186,8 +271,11 @@ public class FacetModule extends SearchComponent {
if (facet == null) continue;
if (facetState.merger == null) {
facetState.merger = facetState.facetRequest.createFacetMerger(facet);
+ facetState.mcontext = new FacetMerger.Context( sreq.responses.size() );
}
- facetState.merger.merge(facet , new FacetMerger.Context());
+ facetState.mcontext.root = facet;
+ facetState.mcontext.newShard(shardRsp.getShard());
+ facetState.merger.merge(facet , facetState.mcontext);
}
}
@@ -199,6 +287,7 @@ public class FacetModule extends SearchComponent {
if (facetState == null) return;
if (facetState.merger != null) {
+ // TODO: merge any refinements
rb.rsp.add("facets", facetState.merger.getMergedResult());
}
}
@@ -226,17 +315,23 @@ class FacetComponentState {
// Only used for distributed search
//
FacetMerger merger;
+ FacetMerger.Context mcontext;
}
-
+// base class for facet functions that can be used in a sort
abstract class FacetSortableMerger extends FacetMerger {
public void prepareSort() {
}
+ @Override
+ public void finish(Context mcontext) {
+ // nothing to do for simple stats...
+ }
+
/** Return the normal comparison sort order. The sort direction is only to be used in special circumstances (such as making NaN sort
* last regardless of sort order.) Normal sorters do not need to pay attention to direction.
*/
- public abstract int compareTo(FacetSortableMerger other, FacetField.SortDirection direction);
+ public abstract int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction);
}
abstract class FacetDoubleMerger extends FacetSortableMerger {
@@ -252,12 +347,12 @@ abstract class FacetDoubleMerger extends FacetSortableMerger {
@Override
- public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) {
+ public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) {
return compare(getDouble(), ((FacetDoubleMerger)other).getDouble(), direction);
}
- public static int compare(double a, double b, FacetField.SortDirection direction) {
+ public static int compare(double a, double b, FacetRequest.SortDirection direction) {
if (a < b) return -1;
if (a > b) return 1;
@@ -295,7 +390,7 @@ class FacetLongMerger extends FacetSortableMerger {
}
@Override
- public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) {
+ public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) {
return Long.compare(val, ((FacetLongMerger)other).val);
}
}
@@ -304,15 +399,20 @@ class FacetLongMerger extends FacetSortableMerger {
// base class for facets that create buckets (and can hence have sub-facets)
abstract class FacetBucketMerger<FacetRequestT extends FacetRequest> extends FacetMerger {
FacetRequestT freq;
- int bucketNumber;
public FacetBucketMerger(FacetRequestT freq) {
this.freq = freq;
}
/** Bucketval is the representative value for the bucket. Only applicable to terms and range queries to distinguish buckets. */
- FacetBucket newBucket(Comparable bucketVal) {
- return new FacetBucket(this, bucketVal, bucketNumber++);
+ FacetBucket newBucket(Comparable bucketVal, Context mcontext) {
+ return new FacetBucket(this, bucketVal, mcontext);
+ }
+
+ @Override
+ public Map<String, Object> getRefinement(Context mcontext) {
+ Collection<String> refineTags = mcontext.getSubsWithRefinement(freq);
+ return null; // FIXME
}
// do subs...
@@ -334,6 +434,7 @@ abstract class FacetBucketMerger<FacetRequestT extends FacetRequest> extends Fac
}
}
+
class FacetQueryMerger extends FacetBucketMerger<FacetQuery> {
FacetBucket bucket;
@@ -344,12 +445,33 @@ class FacetQueryMerger extends FacetBucketMerger<FacetQuery> {
@Override
public void merge(Object facet, Context mcontext) {
if (bucket == null) {
- bucket = newBucket(null);
+ bucket = newBucket(null, mcontext);
}
bucket.mergeBucket((SimpleOrderedMap) facet, mcontext);
}
@Override
+ public Map<String, Object> getRefinement(Context mcontext) {
+ Collection<String> tags;
+ if (mcontext.bucketWasMissing()) {
+ // if this bucket was missing, we need to get all subfacets that have partials (that need to list values for refinement)
+ tags = mcontext.getSubsWithPartial(freq);
+ } else {
+ tags = mcontext.getSubsWithRefinement(freq);
+ }
+
+ Map<String,Object> refinement = bucket.getRefinement(mcontext, tags);
+
+ return refinement;
+ }
+
+
+ @Override
+ public void finish(Context mcontext) {
+ // FIXME we need to propagate!!!
+ }
+
+ @Override
public Object getMergedResult() {
return bucket.getMergedBucket();
}
@@ -360,15 +482,15 @@ class FacetQueryMerger extends FacetBucketMerger<FacetQuery> {
class FacetBucket {
final FacetBucketMerger parent;
final Comparable bucketValue;
- final int bucketNumber; // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, etc)
+ final int bucketNumber; // this is just for internal correlation (the first bucket created is bucket 0, the next bucket 1, across all field buckets)
long count;
Map<String, FacetMerger> subs;
- public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, int bucketNumber) {
+ public FacetBucket(FacetBucketMerger parent, Comparable bucketValue, FacetMerger.Context mcontext) {
this.parent = parent;
this.bucketValue = bucketValue;
- this.bucketNumber = bucketNumber;
+ this.bucketNumber = mcontext.getNewBucketNumber(); // TODO: we don't need bucket numbers for all buckets...
}
public long getCount() {
@@ -403,6 +525,8 @@ class FacetBucket {
public void mergeBucket(SimpleOrderedMap bucket, FacetMerger.Context mcontext) {
// todo: for refinements, we want to recurse, but not re-do stats for intermediate buckets
+ mcontext.setShardFlag(bucketNumber);
+
// drive merging off the received bucket?
for (int i=0; i<bucket.size(); i++) {
String key = bucket.getName(i);
@@ -440,368 +564,84 @@ class FacetBucket {
return out;
}
-}
-
-
-
-class FacetFieldMerger extends FacetBucketMerger<FacetField> {
- FacetBucket missingBucket;
- FacetBucket allBuckets;
- FacetMerger numBuckets;
-
- LinkedHashMap<Object,FacetBucket> buckets = new LinkedHashMap<>();
- List<FacetBucket> sortedBuckets;
- int numReturnedBuckets; // the number of buckets in the bucket lists returned from all of the shards
-
- private static class SortVal implements Comparable<SortVal> {
- FacetBucket bucket;
- FacetSortableMerger merger; // make this class inner and access merger , direction in parent?
- FacetField.SortDirection direction;
-
- @Override
- public int compareTo(SortVal o) {
- int c = -merger.compareTo(o.merger, direction) * direction.getMultiplier();
- return c == 0 ? bucket.bucketValue.compareTo(o.bucket.bucketValue) : c;
- }
- }
- public FacetFieldMerger(FacetField freq) {
- super(freq);
- }
-
- @Override
- public void merge(Object facetResult, Context mcontext) {
- merge((SimpleOrderedMap)facetResult, mcontext);
- }
-
- protected void merge(SimpleOrderedMap facetResult, Context mcontext) {
- if (freq.missing) {
- Object o = facetResult.get("missing");
- if (o != null) {
- if (missingBucket == null) {
- missingBucket = newBucket(null);
+ public Map<String, Object> getRefinement(FacetMerger.Context mcontext, Collection<String> refineTags) {
+ if (subs == null) {
+ return null;
+ }
+ Map<String,Object> refinement = null;
+ for (String tag : refineTags) {
+ FacetMerger subMerger = subs.get(tag);
+ if (subMerger != null) {
+ Map<String,Object> subRef = subMerger.getRefinement(mcontext);
+ if (subRef != null) {
+ if (refinement == null) {
+ refinement = new HashMap<>(refineTags.size());
+ }
+ refinement.put(tag, subRef);
}
- missingBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
}
}
+ return refinement;
+ }
- if (freq.allBuckets) {
- Object o = facetResult.get("allBuckets");
- if (o != null) {
- if (allBuckets == null) {
- allBuckets = newBucket(null);
- }
- allBuckets.mergeBucket((SimpleOrderedMap)o , mcontext);
- }
- }
+ public Map<String, Object> getRefinement2(FacetMerger.Context mcontext, Collection<String> refineTags) {
+ // TODO nocommit - partial results should turn off refining!!!
- List<SimpleOrderedMap> bucketList = (List<SimpleOrderedMap>) facetResult.get("buckets");
- numReturnedBuckets += bucketList.size();
- mergeBucketList(bucketList , mcontext);
+ boolean parentMissing = mcontext.bucketWasMissing();
- if (freq.numBuckets) {
- Object nb = facetResult.get("numBuckets");
- if (nb != null) {
- if (numBuckets == null) {
- numBuckets = new FacetNumBucketsMerger();
- }
- numBuckets.merge(nb , mcontext);
+ // TODO: this is a redundant check for many types of facets... only do on field faceting
+ if (!parentMissing) {
+ // if parent bucket wasn't missing, check if this bucket was.
+ // this really only needs checking on certain buckets... (like terms facet)
+ boolean sawThisBucket = mcontext.getShardFlag(bucketNumber);
+ if (!sawThisBucket) {
+ mcontext.setBucketWasMissing(true);
}
+ } else {
+ // if parent bucket was missing, then we should be too
+ assert !mcontext.getShardFlag(bucketNumber);
}
- }
+ Map<String,Object> refinement = null;
- public void mergeBucketList(List<SimpleOrderedMap> bucketList, Context mcontext) {
- for (SimpleOrderedMap bucketRes : bucketList) {
- Comparable bucketVal = (Comparable)bucketRes.get("val");
- FacetBucket bucket = buckets.get(bucketVal);
- if (bucket == null) {
- bucket = newBucket(bucketVal);
- buckets.put(bucketVal, bucket);
+ if (!mcontext.bucketWasMissing()) {
+ // this is just a pass-through bucket... see if there is anything to do at all
+ if (subs == null || refineTags.isEmpty()) {
+ return null;
}
- bucket.mergeBucket( bucketRes , mcontext );
- }
- }
-
- public void sortBuckets() {
- sortedBuckets = new ArrayList<>( buckets.values() );
-
- Comparator<FacetBucket> comparator = null;
-
- final FacetField.SortDirection direction = freq.sortDirection;
- final int sortMul = direction.getMultiplier();
-
- if ("count".equals(freq.sortVariable)) {
- comparator = (o1, o2) -> {
- int v = -Long.compare(o1.count, o2.count) * sortMul;
- return v == 0 ? o1.bucketValue.compareTo(o2.bucketValue) : v;
- };
- Collections.sort(sortedBuckets, comparator);
- } else if ("index".equals(freq.sortVariable)) {
- comparator = (o1, o2) -> -o1.bucketValue.compareTo(o2.bucketValue) * sortMul;
- Collections.sort(sortedBuckets, comparator);
} else {
- final String key = freq.sortVariable;
-
- /**
- final FacetSortableMerger[] arr = new FacetSortableMerger[buckets.size()];
- final int[] index = new int[arr.length];
- int start = 0;
- int nullStart = index.length;
- int i=0;
- for (FacetBucket bucket : buckets.values()) {
- FacetMerger merger = bucket.getExistingMerger(key);
- if (merger == null) {
- index[--nullStart] = i;
- }
- if (merger != null) {
- arr[start] = (FacetSortableMerger)merger;
- index[start] = i;
- start++;
- }
- i++;
+ // for missing bucket, go over all sub-facts
+ refineTags = null;
+ refinement = new HashMap<>(4);
+ if (bucketValue != null) {
+ refinement.put("_v", bucketValue);
}
-
- PrimUtils.sort(0, nullStart, index, new PrimUtils.IntComparator() {
- @Override
- public int compare(int a, int b) {
- return arr[index[a]].compareTo(arr[index[b]], direction);
- }
- });
- **/
-
- // timsort may do better here given that the lists may be partially sorted.
-
- List<SortVal> lst = new ArrayList<SortVal>(buckets.size());
- List<FacetBucket> nulls = new ArrayList<FacetBucket>(buckets.size()>>1);
- for (int i=0; i<sortedBuckets.size(); i++) {
- FacetBucket bucket = sortedBuckets.get(i);
- FacetMerger merger = bucket.getExistingMerger(key);
- if (merger == null) {
- nulls.add(bucket);
- }
- if (merger != null) {
- SortVal sv = new SortVal();
- sv.bucket = bucket;
- sv.merger = (FacetSortableMerger)merger;
- sv.direction = direction;
- // sv.pos = i; // if we need position in the future...
- lst.add(sv);
- }
- }
- Collections.sort(lst);
- Collections.sort(nulls, (o1, o2) -> o1.bucketValue.compareTo(o2.bucketValue));
-
- ArrayList<FacetBucket> out = new ArrayList<>(buckets.size());
- for (SortVal sv : lst) {
- out.add( sv.bucket );
- }
- out.addAll(nulls);
- sortedBuckets = out;
+ refinement.put("_m",1);
}
- }
- @Override
- public Object getMergedResult() {
- SimpleOrderedMap result = new SimpleOrderedMap();
+ // TODO: listing things like sub-facets that have no field facets are redundant
+ // (we only need facet that have variable values)
- if (numBuckets != null) {
- int removed = 0;
- if (freq.mincount > 1) {
- for (FacetBucket bucket : buckets.values()) {
- if (bucket.count < freq.mincount) removed++;
- }
- }
- result.add("numBuckets", ((Number)numBuckets.getMergedResult()).longValue() - removed);
-
- // TODO: we can further increase this estimate.
- // If not sorting by count, use a simple ratio to scale
- // If sorting by count desc, then add up the highest_possible_missing_count from each shard
- }
-
- sortBuckets();
-
- int first = (int)freq.offset;
- int end = freq.limit >=0 ? first + (int) freq.limit : Integer.MAX_VALUE;
- int last = Math.min(sortedBuckets.size(), end);
-
- List<SimpleOrderedMap> resultBuckets = new ArrayList<>(Math.max(0, (last - first)));
-
- /** this only works if there are no filters (like mincount)
- for (int i=first; i<last; i++) {
- FacetBucket bucket = sortedBuckets.get(i);
- resultBuckets.add( bucket.getMergedBucket() );
- }
- ***/
-
- // TODO: change effective offsets + limits at shards...
-
- int off = (int)freq.offset;
- int lim = freq.limit >= 0 ? (int)freq.limit : Integer.MAX_VALUE;
- for (FacetBucket bucket : sortedBuckets) {
- if (bucket.getCount() < freq.mincount) {
+ for (Map.Entry<String,FacetMerger> sub : subs.entrySet()) {
+ if (refineTags != null && !refineTags.contains(sub.getKey())) {
continue;
}
-
- if (off > 0) {
- --off;
- continue;
- }
-
- if (resultBuckets.size() >= lim) {
- break;
- }
-
- resultBuckets.add( bucket.getMergedBucket() );
- }
-
-
- result.add("buckets", resultBuckets);
- if (missingBucket != null) {
- result.add("missing", missingBucket.getMergedBucket());
- }
- if (allBuckets != null) {
- result.add("allBuckets", allBuckets.getMergedBucket());
- }
-
- return result;
- }
-
-
- private class FacetNumBucketsMerger extends FacetMerger {
- long sumBuckets;
- long shardsMissingSum;
- long shardsTruncatedSum;
- Set<Object> values;
-
- @Override
- public void merge(Object facetResult, Context mcontext) {
- SimpleOrderedMap map = (SimpleOrderedMap)facetResult;
- long numBuckets = ((Number)map.get("numBuckets")).longValue();
- sumBuckets += numBuckets;
-
- List vals = (List)map.get("vals");
- if (vals != null) {
- if (values == null) {
- values = new HashSet<>(vals.size()*4);
+ Map<String,Object> subRef = sub.getValue().getRefinement(mcontext);
+ if (subRef != null) {
+ if (refinement == null) {
+ refinement = new HashMap<>(4);
}
- values.addAll(vals);
- if (numBuckets > values.size()) {
- shardsTruncatedSum += numBuckets - values.size();
- }
- } else {
- shardsMissingSum += numBuckets;
+ refinement.put(sub.getKey(), subRef);
}
}
- @Override
- public Object getMergedResult() {
- long exactCount = values == null ? 0 : values.size();
- return exactCount + shardsMissingSum + shardsTruncatedSum;
- // TODO: reduce count by (at least) number of buckets that fail to hit mincount (after merging)
- // that should make things match for most of the small tests at least
- }
- }
-}
-
-
-class FacetRangeMerger extends FacetBucketMerger<FacetRange> {
- FacetBucket beforeBucket;
- FacetBucket afterBucket;
- FacetBucket betweenBucket;
-
- LinkedHashMap<Object, FacetBucket> buckets = new LinkedHashMap<Object, FacetBucket>();
-
- public FacetRangeMerger(FacetRange freq) {
- super(freq);
+ // reset the "bucketMissing" flag on the way back out.
+ mcontext.setBucketWasMissing(parentMissing);
+ return refinement;
}
- @Override
- FacetMerger createFacetMerger(String key, Object val) {
- return super.createFacetMerger(key, val);
- }
-
- @Override
- public void merge(Object facetResult, Context mcontext) {
- merge((SimpleOrderedMap) facetResult , mcontext);
- }
-
- public void merge(SimpleOrderedMap facetResult, Context mcontext) {
- boolean all = freq.others.contains(FacetParams.FacetRangeOther.ALL);
-
- if (all || freq.others.contains(FacetParams.FacetRangeOther.BEFORE)) {
- Object o = facetResult.get("before");
- if (o != null) {
- if (beforeBucket == null) {
- beforeBucket = newBucket(null);
- }
- beforeBucket.mergeBucket((SimpleOrderedMap)o, mcontext);
- }
- }
-
- if (all || freq.others.contains(FacetParams.FacetRangeOther.AFTER)) {
- Object o = facetResult.get("after");
- if (o != null) {
- if (afterBucket == null) {
- afterBucket = newBucket(null);
- }
- afterBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
- }
- }
-
- if (all || freq.others.contains(FacetParams.FacetRangeOther.BETWEEN)) {
- Object o = facetResult.get("between");
- if (o != null) {
- if (betweenBucket == null) {
- betweenBucket = newBucket(null);
- }
- betweenBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
- }
- }
-
- List<SimpleOrderedMap> bucketList = (List<SimpleOrderedMap>) facetResult.get("buckets");
- mergeBucketList(bucketList , mcontext);
- }
-
- // TODO: share more merging with field faceting
- public void mergeBucketList(List<SimpleOrderedMap> bucketList, Context mcontext) {
- for (SimpleOrderedMap bucketRes : bucketList) {
- Comparable bucketVal = (Comparable)bucketRes.get("val");
- FacetBucket bucket = buckets.get(bucketVal);
- if (bucket == null) {
- bucket = newBucket(bucketVal);
- buckets.put(bucketVal, bucket);
- }
- bucket.mergeBucket( bucketRes , mcontext );
- }
- }
-
- @Override
- public Object getMergedResult() {
- SimpleOrderedMap result = new SimpleOrderedMap(4);
-
- List<SimpleOrderedMap> resultBuckets = new ArrayList<>(buckets.size());
-
- for (FacetBucket bucket : buckets.values()) {
- if (bucket.getCount() < freq.mincount) {
- continue;
- }
- resultBuckets.add( bucket.getMergedBucket() );
- }
+}
- result.add("buckets", resultBuckets);
- if (beforeBucket != null) {
- result.add("before", beforeBucket.getMergedBucket());
- }
- if (afterBucket != null) {
- result.add("after", afterBucket.getMergedBucket());
- }
- if (betweenBucket != null) {
- result.add("between", betweenBucket.getMergedBucket());
- }
- return result;
-
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
index 1b98de0..99f6fce 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
@@ -36,7 +36,7 @@ import org.apache.solr.schema.TrieField;
import org.apache.solr.search.DocSet;
import org.apache.solr.util.DateMathParser;
-public class FacetRange extends FacetRequest {
+public class FacetRange extends FacetRequestSorted {
String field;
Object start;
Object end;
@@ -44,8 +44,12 @@ public class FacetRange extends FacetRequest {
boolean hardend = false;
EnumSet<FacetParams.FacetRangeInclude> include;
EnumSet<FacetParams.FacetRangeOther> others;
- long mincount = 0;
+ {
+ // defaults
+ mincount = 0;
+ limit = -1;
+ }
@Override
public FacetProcessor createFacetProcessor(FacetContext fcontext) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java
new file mode 100644
index 0000000..587b919
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRangeMerger.java
@@ -0,0 +1,123 @@
+package org.apache.solr.search.facet;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+
+import org.apache.solr.common.params.FacetParams;
+import org.apache.solr.common.util.SimpleOrderedMap;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class FacetRangeMerger extends FacetRequestSortedMerger<FacetRange> {
+ FacetBucket beforeBucket;
+ FacetBucket afterBucket;
+ FacetBucket betweenBucket;
+
+ public FacetRangeMerger(FacetRange freq) {
+ super(freq);
+ }
+
+ @Override
+ FacetMerger createFacetMerger(String key, Object val) {
+ return super.createFacetMerger(key, val);
+ }
+
+ @Override
+ public void merge(Object facetResult, Context mcontext) {
+ merge((SimpleOrderedMap) facetResult , mcontext);
+ }
+
+ @Override
+ public void sortBuckets() {
+ // TODO: mincount>0 will mess up order?
+ sortedBuckets = new ArrayList<>( buckets.values() );
+ }
+
+ @Override
+ public void finish(Context mcontext) {
+ // nothing to do
+ }
+
+ public void merge(SimpleOrderedMap facetResult, Context mcontext) {
+ boolean all = freq.others.contains(FacetParams.FacetRangeOther.ALL);
+
+ if (all || freq.others.contains(FacetParams.FacetRangeOther.BEFORE)) {
+ Object o = facetResult.get("before");
+ if (o != null) {
+ if (beforeBucket == null) {
+ beforeBucket = newBucket(null, mcontext);
+ }
+ beforeBucket.mergeBucket((SimpleOrderedMap)o, mcontext);
+ }
+ }
+
+ if (all || freq.others.contains(FacetParams.FacetRangeOther.AFTER)) {
+ Object o = facetResult.get("after");
+ if (o != null) {
+ if (afterBucket == null) {
+ afterBucket = newBucket(null, mcontext);
+ }
+ afterBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
+ }
+ }
+
+ if (all || freq.others.contains(FacetParams.FacetRangeOther.BETWEEN)) {
+ Object o = facetResult.get("between");
+ if (o != null) {
+ if (betweenBucket == null) {
+ betweenBucket = newBucket(null, mcontext);
+ }
+ betweenBucket.mergeBucket((SimpleOrderedMap)o , mcontext);
+ }
+ }
+
+ List<SimpleOrderedMap> bucketList = (List<SimpleOrderedMap>) facetResult.get("buckets");
+ mergeBucketList(bucketList , mcontext);
+ }
+
+
+ @Override
+ public Object getMergedResult() {
+ // TODO: use sortedBuckets
+ SimpleOrderedMap result = new SimpleOrderedMap(4);
+
+ List<SimpleOrderedMap> resultBuckets = new ArrayList<>(buckets.size());
+
+ for (FacetBucket bucket : buckets.values()) {
+ if (bucket.getCount() < freq.mincount) {
+ continue;
+ }
+ resultBuckets.add( bucket.getMergedBucket() );
+ }
+
+ result.add("buckets", resultBuckets);
+
+ if (beforeBucket != null) {
+ result.add("before", beforeBucket.getMergedBucket());
+ }
+ if (afterBucket != null) {
+ result.add("after", afterBucket.getMergedBucket());
+ }
+ if (betweenBucket != null) {
+ result.add("between", betweenBucket.getMergedBucket());
+ }
+ return result;
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
index 0446202..76d7d2a 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
@@ -35,10 +35,48 @@ import org.apache.solr.search.QueryContext;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
+import static org.apache.solr.search.facet.FacetRequest.RefineMethod.NONE;
+
public abstract class FacetRequest {
+
+ public static enum SortDirection {
+ asc(-1) ,
+ desc(1);
+
+ private final int multiplier;
+ private SortDirection(int multiplier) {
+ this.multiplier = multiplier;
+ }
+
+ // asc==-1, desc==1
+ public int getMultiplier() {
+ return multiplier;
+ }
+ }
+
+ public static enum RefineMethod {
+ NONE,
+ SIMPLE;
+ // NONE is distinct from null since we may want to know if refinement was explicitly turned off.
+ public static FacetRequest.RefineMethod fromObj(Object method) {
+ if (method == null) return null;
+ if (method instanceof Boolean) {
+ return ((Boolean)method) ? SIMPLE : NONE;
+ }
+ if ("simple".equals(method)) {
+ return SIMPLE;
+ } else if ("none".equals(method)) {
+ return NONE;
+ } else {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown RefineMethod method " + method);
+ }
+ }
+ }
+
+
protected Map<String,AggValueSource> facetStats; // per-bucket statistics
- protected Map<String,FacetRequest> subFacets; // list of facets
+ protected Map<String,FacetRequest> subFacets; // per-bucket sub-facets
protected List<String> filters;
protected boolean processEmpty;
protected Domain domain;
@@ -64,6 +102,22 @@ public abstract class FacetRequest {
return subFacets;
}
+ /** Returns null if unset */
+ public RefineMethod getRefineMethod() {
+ return null;
+ }
+
+ public boolean doRefine() {
+ return !(getRefineMethod()==null || getRefineMethod()==NONE);
+ }
+
+ /** Returns true if this facet can return just some of the facet buckets that match all the criteria.
+ * This is normally true only for facets with a limit.
+ */
+ public boolean returnsPartial() {
+ return false;
+ }
+
public void addStat(String key, AggValueSource stat) {
facetStats.put(key, stat);
}
@@ -541,6 +595,9 @@ class FacetFieldParser extends FacetParser<FacetField> {
facet.method = FacetField.FacetMethod.fromString(getString(m, "method", null));
facet.cacheDf = (int)getLong(m, "cacheDf", facet.cacheDf);
+ // TODO: pull up to higher level?
+ facet.refine = FacetField.RefineMethod.fromObj(m.get("refine"));
+
facet.perSeg = (Boolean)m.get("perSeg");
// facet.sort may depend on a facet stat...
@@ -562,18 +619,18 @@ class FacetFieldParser extends FacetParser<FacetField> {
private void parseSort(Object sort) {
if (sort == null) {
facet.sortVariable = "count";
- facet.sortDirection = FacetField.SortDirection.desc;
+ facet.sortDirection = FacetRequest.SortDirection.desc;
} else if (sort instanceof String) {
String sortStr = (String)sort;
if (sortStr.endsWith(" asc")) {
facet.sortVariable = sortStr.substring(0, sortStr.length()-" asc".length());
- facet.sortDirection = FacetField.SortDirection.asc;
+ facet.sortDirection = FacetRequest.SortDirection.asc;
} else if (sortStr.endsWith(" desc")) {
facet.sortVariable = sortStr.substring(0, sortStr.length()-" desc".length());
- facet.sortDirection = FacetField.SortDirection.desc;
+ facet.sortDirection = FacetRequest.SortDirection.desc;
} else {
facet.sortVariable = sortStr;
- facet.sortDirection = "index".equals(facet.sortVariable) ? FacetField.SortDirection.asc : FacetField.SortDirection.desc; // default direction for "index" is ascending
+ facet.sortDirection = "index".equals(facet.sortVariable) ? FacetRequest.SortDirection.asc : FacetRequest.SortDirection.desc; // default direction for "index" is ascending
}
} else {
// sort : { myvar : 'desc' }
@@ -583,7 +640,7 @@ class FacetFieldParser extends FacetParser<FacetField> {
String k = entry.getKey();
Object v = entry.getValue();
facet.sortVariable = k;
- facet.sortDirection = FacetField.SortDirection.valueOf(v.toString());
+ facet.sortDirection = FacetRequest.SortDirection.valueOf(v.toString());
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java b/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java
new file mode 100644
index 0000000..955882d
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetRequestSortedMerger.java
@@ -0,0 +1,234 @@
+package org.apache.solr.search.facet;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.solr.common.util.SimpleOrderedMap;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// base class for facets that create a list of buckets that can be sorted
+abstract class FacetRequestSortedMerger<FacetRequestT extends FacetRequestSorted> extends FacetBucketMerger<FacetRequestT> {
+ LinkedHashMap<Object,FacetBucket> buckets = new LinkedHashMap<>();
+ List<FacetBucket> sortedBuckets;
+
+ public FacetRequestSortedMerger(FacetRequestT freq) {
+ super(freq);
+ }
+
+ private static class SortVal implements Comparable<SortVal> {
+ FacetBucket bucket;
+ FacetSortableMerger merger; // make this class inner and access merger , direction in parent?
+ FacetRequest.SortDirection direction;
+
+ @Override
+ public int compareTo(SortVal o) {
+ int c = -merger.compareTo(o.merger, direction) * direction.getMultiplier();
+ return c == 0 ? bucket.bucketValue.compareTo(o.bucket.bucketValue) : c;
+ }
+ }
+
+ public void mergeBucketList(List<SimpleOrderedMap> bucketList, Context mcontext) {
+ for (SimpleOrderedMap bucketRes : bucketList) {
+ Comparable bucketVal = (Comparable)bucketRes.get("val");
+ FacetBucket bucket = buckets.get(bucketVal);
+ if (bucket == null) {
+ bucket = newBucket(bucketVal, mcontext);
+ buckets.put(bucketVal, bucket);
+ }
+ bucket.mergeBucket( bucketRes , mcontext );
+ }
+ }
+
+ public void sortBuckets() {
+ sortedBuckets = new ArrayList<>( buckets.values() );
+
+ Comparator<FacetBucket> comparator = null;
+
+ final FacetRequest.SortDirection direction = freq.sortDirection;
+ final int sortMul = direction.getMultiplier();
+
+ if ("count".equals(freq.sortVariable)) {
+ comparator = (o1, o2) -> {
+ int v = -Long.compare(o1.count, o2.count) * sortMul;
+ return v == 0 ? o1.bucketValue.compareTo(o2.bucketValue) : v;
+ };
+ Collections.sort(sortedBuckets, comparator);
+ } else if ("index".equals(freq.sortVariable)) {
+ comparator = (o1, o2) -> -o1.bucketValue.compareTo(o2.bucketValue) * sortMul;
+ Collections.sort(sortedBuckets, comparator);
+ } else {
+ final String key = freq.sortVariable;
+
+ /**
+ final FacetSortableMerger[] arr = new FacetSortableMerger[buckets.size()];
+ final int[] index = new int[arr.length];
+ int start = 0;
+ int nullStart = index.length;
+ int i=0;
+ for (FacetBucket bucket : buckets.values()) {
+ FacetMerger merger = bucket.getExistingMerger(key);
+ if (merger == null) {
+ index[--nullStart] = i;
+ }
+ if (merger != null) {
+ arr[start] = (FacetSortableMerger)merger;
+ index[start] = i;
+ start++;
+ }
+ i++;
+ }
+
+ PrimUtils.sort(0, nullStart, index, new PrimUtils.IntComparator() {
+ @Override
+ public int compare(int a, int b) {
+ return arr[index[a]].compareTo(arr[index[b]], direction);
+ }
+ });
+ **/
+
+
+ List<SortVal> lst = new ArrayList<>(buckets.size());
+ List<FacetBucket> nulls = new ArrayList<>(buckets.size()>>1);
+ for (int i=0; i<sortedBuckets.size(); i++) {
+ FacetBucket bucket = sortedBuckets.get(i);
+ FacetMerger merger = bucket.getExistingMerger(key);
+ if (merger == null) {
+ nulls.add(bucket);
+ }
+ if (merger != null) {
+ SortVal sv = new SortVal();
+ sv.bucket = bucket;
+ sv.merger = (FacetSortableMerger)merger;
+ sv.direction = direction;
+ // sv.pos = i; // if we need position in the future...
+ lst.add(sv);
+ }
+ }
+ Collections.sort(lst);
+ Collections.sort(nulls, (o1, o2) -> o1.bucketValue.compareTo(o2.bucketValue));
+
+ ArrayList<FacetBucket> out = new ArrayList<>(buckets.size());
+ for (SortVal sv : lst) {
+ out.add( sv.bucket );
+ }
+ out.addAll(nulls);
+ sortedBuckets = out;
+ }
+ }
+
+
+ @Override
+ public Map<String, Object> getRefinement(Context mcontext) {
+ // step 1) If this facet request has refining, then we need to fully request top buckets that were not seen by this shard.
+ // step 2) If this facet does not have refining, but some sub-facets do, we need to check/recurse those sub-facets in *every* top bucket.
+ // A combination of the two is possible and makes step 2 redundant for any buckets we fully requested in step 1.
+
+ Map<String,Object> refinement = null;
+
+ Collection<String> tags = mcontext.getSubsWithRefinement(freq);
+ if (tags.isEmpty() && !freq.doRefine()) {
+ // we don't have refining, and neither do our subs
+ return null;
+ }
+
+ // Tags for sub facets that have partial facets somewhere in their children.
+ // If we are missing a bucket for this shard, we'll need to get the specific buckets that need refining.
+ Collection<String> tagsWithPartial = mcontext.getSubsWithPartial(freq);
+
+ boolean thisMissing = mcontext.bucketWasMissing();
+
+ int num = (int)(freq.offset + freq.limit);
+ int numBucketsToCheck = Math.min(buckets.size(), num);
+
+ Collection<FacetBucket> bucketList;
+ if (buckets.size() < num) {
+ // no need to sort
+ // todo: but we may need to filter.... simplify by always sorting?
+ bucketList = buckets.values();
+ } else {
+ // only sort once
+ if (sortedBuckets == null) {
+ sortBuckets(); // todo: make sure this filters buckets as well
+ }
+ bucketList = sortedBuckets;
+ }
+
+ ArrayList<Object> leafBuckets = null; // "_l" missing buckets specified by bucket value only (no need to specify anything further)
+ ArrayList<Object> missingBuckets = null; // "_m" missing buckets that need to specify values for partial facets
+ ArrayList<Object> skipBuckets = null; // "_s" present buckets that we need to recurse into because children facets have refinement requirements
+
+ for (FacetBucket bucket : bucketList) {
+ if (numBucketsToCheck-- <= 0) break;
+ // if this bucket is missing,
+ assert thisMissing == false || thisMissing == true && mcontext.getShardFlag(bucket.bucketNumber) == false;
+ boolean saw = !thisMissing && mcontext.getShardFlag(bucket.bucketNumber);
+ if (!saw) {
+ // we didn't see the bucket for this shard
+ Map<String,Object> bucketRefinement = null;
+
+ // find facets that we need to fill in buckets for
+ if (!tagsWithPartial.isEmpty()) {
+ boolean prev = mcontext.setBucketWasMissing(true);
+ bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial);
+ mcontext.setBucketWasMissing(prev);
+
+ if (bucketRefinement != null) {
+ if (missingBuckets==null) missingBuckets = new ArrayList<>();
+ missingBuckets.add(bucketRefinement);
+ }
+ }
+
+ // if we didn't add to "_m" (missing), then we should add to "_l" (leaf missing)
+ if (bucketRefinement == null) {
+ if (leafBuckets == null) leafBuckets = new ArrayList<>();
+ leafBuckets.add(bucket.bucketValue);
+ }
+
+ } else if (!tags.isEmpty()) {
+ // we had this bucket, but we need to recurse to certain children that have refinements
+ Map<String,Object> bucketRefinement = bucket.getRefinement(mcontext, tagsWithPartial);
+ if (bucketRefinement != null) {
+ if (skipBuckets == null) skipBuckets = new ArrayList<>();
+ skipBuckets.add(bucketRefinement);
+ }
+ }
+
+ }
+
+ // TODO: what if we don't need to refine any variable buckets, but we do need to contribute to numBuckets, missing, allBuckets, etc...
+ // because we were "missing". That will be handled at a higher level (i.e. we'll be in someone's missing bucket?)
+ // TODO: test with a sub-facet with a limit of 0 and something like a missing bucket
+ if (leafBuckets != null || missingBuckets != null || skipBuckets != null) {
+ refinement = new HashMap<>(3);
+ if (leafBuckets != null) refinement.put("_l",leafBuckets);
+ if (missingBuckets != null) refinement.put("_m", missingBuckets);
+ if (skipBuckets != null) refinement.put("_s", skipBuckets);
+ }
+
+ return refinement;
+ }
+
+
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
index 09436c1..89e2386 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/HLLAgg.java
@@ -99,7 +99,7 @@ public class HLLAgg extends StrAggValueSource {
}
@Override
- public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) {
+ public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) {
return Long.compare( getLong(), ((Merger)other).getLong() );
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java b/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java
index 6285b39..a1f44f0 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/PercentileAgg.java
@@ -207,7 +207,7 @@ public class PercentileAgg extends SimpleAggValueSource {
}
@Override
- public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) {
+ public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) {
return Double.compare(getSortVal(), ((Merger) other).getSortVal());
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
index 261ed60..341bdaf 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
@@ -113,7 +113,7 @@ public class UniqueAgg extends StrAggValueSource {
}
@Override
- public int compareTo(FacetSortableMerger other, FacetField.SortDirection direction) {
+ public int compareTo(FacetSortableMerger other, FacetRequest.SortDirection direction) {
return Long.compare( getLong(), ((Merger)other).getLong() );
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5d29fd87/solr/test-framework/src/java/org/apache/solr/JSONTestUtil.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/JSONTestUtil.java b/solr/test-framework/src/java/org/apache/solr/JSONTestUtil.java
index 634608b..f5b2ffb 100644
--- a/solr/test-framework/src/java/org/apache/solr/JSONTestUtil.java
+++ b/solr/test-framework/src/java/org/apache/solr/JSONTestUtil.java
@@ -74,6 +74,19 @@ public class JSONTestUtil {
}
/**
+ * @param input Object structure to parse and test against
+ * @param pathAndExpected JSON path expression + '==' + expected value
+ * @param delta tollerance allowed in comparing float/double values
+ */
+ public static String matchObj(Object input, String pathAndExpected, double delta) throws Exception {
+ int pos = pathAndExpected.indexOf("==");
+ String path = pos>=0 ? pathAndExpected.substring(0,pos) : null;
+ String expected = pos>=0 ? pathAndExpected.substring(pos+2) : pathAndExpected;
+ Object expectObj = failRepeatedKeys ? new NoDupsObjectBuilder(new JSONParser(expected)).getVal() : ObjectBuilder.fromJSON(expected);
+ return matchObj(path, input, expectObj, delta);
+ }
+
+ /**
* @param path JSON path expression
* @param input JSON Structure to parse and test against
* @param expected expected value of path