You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2015/04/29 15:28:46 UTC

svn commit: r1676743 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/search/facet/ core/src/test/org/apache/solr/search/facet/

Author: yonik
Date: Wed Apr 29 13:28:33 2015
New Revision: 1676743

URL: http://svn.apache.org/r1676743
Log:
SOLR-7477: implement facet excludeTags

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1676743&r1=1676742&r2=1676743&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Apr 29 13:28:33 2015
@@ -153,6 +153,14 @@ New Features
 
 * SOLR-7437: Make HDFS transaction log replication factor configurable. (Mark Miller)
 
+* SOLR-7477: Multi-select faceting support for the Facet Module via the "excludeTags"
+  parameter which disregards any matching tagged filters for that facet.  Example:
+  & q=shoes
+  & fq={!tag=COLOR}color:blue
+  & json.facet={ colors:{type:terms, field:color, excludeTags=COLOR} } 
+  (yonik)
+
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetField.java?rev=1676743&r1=1676742&r2=1676743&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetField.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetField.java Wed Apr 29 13:28:33 2015
@@ -194,6 +194,7 @@ abstract class FacetFieldProcessorFCBase
 
   @Override
   public void process() throws IOException {
+    super.process();
     sf = fcontext.searcher.getSchema().getField(freq.field);
     response = getFieldCacheCounts();
   }
@@ -340,10 +341,9 @@ abstract class FacetFieldProcessorFCBase
 
       // handle sub-facets for this bucket
       if (freq.getSubFacets().size() > 0) {
-        FacetContext subContext = fcontext.sub();
-        subContext.base = fcontext.searcher.getDocSet(new TermQuery(new Term(sf.getName(), br.clone())), fcontext.base);
+        TermQuery filter = new TermQuery(new Term(sf.getName(), br.clone()));
         try {
-          fillBucketSubs(bucket, subContext);
+          processSubs(bucket, filter, fcontext.searcher.getDocSet(filter, fcontext.base) );
         } finally {
           // subContext.base.decref();  // OFF-HEAP
           // subContext.base = null;  // do not modify context after creation... there may be deferred execution (i.e. streaming)
@@ -368,13 +368,11 @@ abstract class FacetFieldProcessorFCBase
         }
 
         if (freq.getSubFacets().size() > 0) {
-          FacetContext subContext = fcontext.sub();
           // TODO: we can do better than this!
           if (missingDocSet == null) {
             missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
           }
-          subContext.base = missingDocSet;
-          fillBucketSubs(missingBucket, subContext);
+          processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
         }
 
         res.add("missing", missingBucket);
@@ -542,6 +540,8 @@ class FacetFieldProcessorStream extends
 
   @Override
   public void process() throws IOException {
+    super.process();
+
     // We need to keep the fcontext open after processing is done (since we will be streaming in the response writer).
     // But if the connection is broken, we want to clean up.
     // fcontext.base.incref();  // OFF-HEAP
@@ -790,13 +790,15 @@ class FacetFieldProcessorStream extends
 
         // OK, we have a good bucket to return... first get bucket value before moving to next term
         Object bucketVal = sf.getType().toObject(sf, term);
+        BytesRef termCopy = BytesRef.deepCopyOf(term);
         term = termsEnum.next();
 
         SimpleOrderedMap<Object> bucket = new SimpleOrderedMap<>();
         bucket.add("val", bucketVal);
         addStats(bucket, 0);
         if (hasSubFacets) {
-          processSubs(bucket, termSet);
+          TermQuery filter = new TermQuery(new Term(freq.field, termCopy));
+          processSubs(bucket, filter, termSet);
         }
 
         // TODO... termSet needs to stick around for streaming sub-facets?

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java?rev=1676743&r1=1676742&r2=1676743&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetQuery.java Wed Apr 29 13:28:33 2015
@@ -52,6 +52,7 @@ class FacetQueryProcessor extends FacetP
 
   @Override
   public void process() throws IOException {
+    super.process();
     response = new SimpleOrderedMap<>();
     fillBucket(response, freq.q);
   }

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java?rev=1676743&r1=1676742&r2=1676743&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRange.java Wed Apr 29 13:28:33 2015
@@ -70,6 +70,8 @@ class FacetRangeProcessor extends FacetP
 
   @Override
   public void process() throws IOException {
+    super.process();
+
     // Under the normal mincount=0, each shard will need to return 0 counts since we don't calculate buckets at the top level.
     // But if mincount>0 then our sub mincount can be set to 1.
 
@@ -223,6 +225,9 @@ class FacetRangeProcessor extends FacetP
 
     int slotCount = rangeList.size() + otherList.size();
     intersections = new DocSet[slotCount];
+    filters = new Query[slotCount];
+
+
     createAccs(fcontext.base.size(), slotCount);
     prepareForCollection();
 
@@ -261,12 +266,14 @@ class FacetRangeProcessor extends FacetP
     return res;
   }
 
+  private Query[] filters;
   private DocSet[] intersections;
   private void rangeStats(Range range, int slot) throws IOException {
     Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
     // TODO: specialize count only
     DocSet intersection = fcontext.searcher.getDocSet(rangeQ, fcontext.base);
-    intersections[slot] = intersection;  // save for later
+    filters[slot] = rangeQ;
+    intersections[slot] = intersection;  // save for later  // TODO: only save if number of slots is small enough?
     int num = collect(intersection, slot);
     countAcc.incrementCount(slot, num); // TODO: roll this into collect()
   }
@@ -275,11 +282,8 @@ class FacetRangeProcessor extends FacetP
     // handle sub-facets for this bucket
     if (freq.getSubFacets().size() > 0) {
       DocSet subBase = intersections[slot];
-      if (subBase.size() == 0) return;
-      FacetContext subContext = fcontext.sub();
-      subContext.base = subBase;
       try {
-        fillBucketSubs(bucket, subContext);
+        processSubs(bucket, filters[slot], subBase);
       } finally {
         // subContext.base.decref();  // OFF-HEAP
         // subContext.base = null;  // do not modify context after creation... there may be deferred execution (i.e. streaming)

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java?rev=1676743&r1=1676742&r2=1676743&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/facet/FacetRequest.java Wed Apr 29 13:28:33 2015
@@ -19,19 +19,26 @@ package org.apache.solr.search.facet;
 
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
 import java.util.EnumSet;
+import java.util.IdentityHashMap;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Query;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.handler.component.ResponseBuilder;
 import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrRequestInfo;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
@@ -47,7 +54,7 @@ import org.apache.solr.search.SyntaxErro
 public abstract class FacetRequest {
   protected Map<String,AggValueSource> facetStats;  // per-bucket statistics
   protected Map<String,FacetRequest> subFacets;     // list of facets
-  protected List<String> excludeFilters;
+  protected List<String> excludeTags;
   protected boolean processEmpty;
 
   public FacetRequest() {
@@ -84,6 +91,7 @@ class FacetContext {
   QueryContext qcontext;
   SolrQueryRequest req;  // TODO: replace with params?
   SolrIndexSearcher searcher;
+  Query filter;  // TODO: keep track of as a DocSet or as a Query?
   DocSet base;
   FacetContext parent;
   int flags;
@@ -92,15 +100,22 @@ class FacetContext {
     return (flags & IS_SHARD) != 0;
   }
 
-  public FacetContext sub() {
+  /**
+   * @param filter The filter for the bucket that resulted in this context/domain.  Can be null if this is the root context.
+   * @param domain The resulting set of documents for this facet.
+   */
+  public FacetContext sub(Query filter, DocSet domain) {
     FacetContext ctx = new FacetContext();
+    ctx.parent = this;
+    ctx.base = domain;
+    ctx.filter = filter;
+
+    // carry over from parent
     ctx.flags = flags;
     ctx.qcontext = qcontext;
     ctx.req = req;
     ctx.searcher = searcher;
-    ctx.base = base;
 
-    ctx.parent = this;
     return ctx;
   }
 }
@@ -121,10 +136,69 @@ class FacetProcessor<FacetRequestT exten
   }
 
   public void process() throws IOException {
+    handleDomainChanges();
+  }
+
+  protected void handleDomainChanges() throws IOException {
+    if (freq.excludeTags == null || freq.excludeTags.size() == 0) {
+      return;
+    }
+
+    // TODO: somehow remove responsebuilder dependency
+    ResponseBuilder rb = SolrRequestInfo.getRequestInfo().getResponseBuilder();
+    Map tagMap = (Map) rb.req.getContext().get("tags");
+    if (tagMap == null) {
+      // no filters were tagged
+      return;
+    }
+
+    IdentityHashMap<Query,Boolean> excludeSet = new IdentityHashMap<>();
+    for (String excludeTag : freq.excludeTags) {
+      Object olst = tagMap.get(excludeTag);
+      // tagMap has entries of List<String,List<QParser>>, but subject to change in the future
+      if (!(olst instanceof Collection)) continue;
+      for (Object o : (Collection<?>)olst) {
+        if (!(o instanceof QParser)) continue;
+        QParser qp = (QParser)o;
+        try {
+          excludeSet.put(qp.getQuery(), Boolean.TRUE);
+        } catch (SyntaxError syntaxError) {
+          // This should not happen since we should only be retrieving a previously parsed query
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, syntaxError);
+        }
+      }
+    }
+    if (excludeSet.size() == 0) return;
+
+    List<Query> qlist = new ArrayList<>();
+
+    // add the base query
+    if (!excludeSet.containsKey(rb.getQuery())) {
+      qlist.add(rb.getQuery());
+    }
+
+    // add the filters
+    if (rb.getFilters() != null) {
+      for (Query q : rb.getFilters()) {
+        if (!excludeSet.containsKey(q)) {
+          qlist.add(q);
+        }
+      }
+    }
 
+    // now walk back up the context tree
+    // TODO: we lose parent exclusions...
+    for (FacetContext curr = fcontext; curr != null; curr = curr.parent) {
+      if (curr.filter != null) {
+        qlist.add( curr.filter );
+      }
+    }
 
+    // recompute the base domain
+    fcontext.base = fcontext.searcher.getDocSet(qlist);
   }
 
+
   public Object getResponse() {
     return null;
   }
@@ -171,8 +245,19 @@ class FacetProcessor<FacetRequestT exten
   }
 
 
-  protected void fillBucketSubs(SimpleOrderedMap<Object> response, FacetContext subContext) throws IOException {
+  protected void processSubs(SimpleOrderedMap<Object> response, Query filter, DocSet domain) throws IOException {
+
+    // TODO: what if a zero bucket has a sub-facet with an exclusion that would yield results?
+    // should we check for domain-altering exclusions, or even ask the sub-facet for
+    // it's domain and then only skip it if it's 0?
+
+    if (domain == null || domain.size() == 0 && !freq.processEmpty) {
+      return;
+    }
+
     for (Map.Entry<String,FacetRequest> sub : freq.getSubFacets().entrySet()) {
+      // make a new context for each sub-facet since they can change the domain
+      FacetContext subContext = fcontext.sub(filter, domain);
       FacetProcessor subProcessor = sub.getValue().createFacetProcessor(subContext);
       subProcessor.process();
       response.add( sub.getKey(), subProcessor.getResponse() );
@@ -235,9 +320,6 @@ class FacetProcessor<FacetRequestT exten
   }
 
 
-
-
-
   public void fillBucket(SimpleOrderedMap<Object> bucket, Query q) throws IOException {
     boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
 
@@ -264,7 +346,7 @@ class FacetProcessor<FacetRequestT exten
 
     try {
       processStats(bucket, result, (int) count);
-      processSubs(bucket, result);
+      processSubs(bucket, q, result);
     } finally {
       if (result != null) {
         // result.decref(); // OFF-HEAP
@@ -273,23 +355,6 @@ class FacetProcessor<FacetRequestT exten
     }
   }
 
-
-
-
-  protected void processSubs(SimpleOrderedMap<Object> bucket, DocSet result) throws IOException {
-    // TODO: process exclusions, etc
-
-    if (result == null || result.size() == 0 && !freq.processEmpty) {
-      return;
-    }
-
-    FacetContext subContext = fcontext.sub();
-    subContext.base = result;
-
-    fillBucketSubs(bucket, subContext);
-  }
-
-
   public static DocSet getFieldMissing(SolrIndexSearcher searcher, DocSet docs, String fieldName) throws IOException {
     SchemaField sf = searcher.getSchema().getField(fieldName);
     DocSet hasVal = searcher.getDocSet(sf.getType().getRangeQuery(null, sf, null, null, false, false));
@@ -298,6 +363,14 @@ class FacetProcessor<FacetRequestT exten
     return answer;
   }
 
+  public static Query getFieldMissingQuery(SolrIndexSearcher searcher, String fieldName) throws IOException {
+    SchemaField sf = searcher.getSchema().getField(fieldName);
+    Query hasVal = sf.getType().getRangeQuery(null, sf, null, null, false, false);
+    BooleanQuery noVal = new BooleanQuery();
+    noVal.add(hasVal, BooleanClause.Occur.MUST_NOT);
+    return noVal;
+  }
+
 }
 
 
@@ -450,6 +523,14 @@ abstract class FacetParser<FacetRequestT
   }
 
 
+  protected void parseCommonParams(Object o) {
+    if (o instanceof Map) {
+      Map<String,Object> m = (Map<String,Object>)o;
+      facet.excludeTags = getStringList(m, "excludeTags");
+    }
+  }
+
+
   public String getField(Map<String,Object> args) {
     Object fieldName = args.get("field"); // TODO: pull out into defined constant
     if (fieldName == null) {
@@ -520,6 +601,20 @@ abstract class FacetParser<FacetRequestT
     return (String)o;
   }
 
+  public List<String> getStringList(Map<String,Object> args, String paramName) {
+    Object o = args.get(paramName);
+    if (o == null) {
+      return null;
+    }
+    if (o instanceof List) {
+      return (List<String>)o;
+    }
+    if (o instanceof String) {
+      return StrUtils.splitSmart((String)o, ",", true);
+    }
+
+    throw err("Expected list of string or comma separated string values.");
+  }
 
   public IndexSchema getSchema() {
     return parent.getSchema();
@@ -566,6 +661,8 @@ class FacetQueryParser extends FacetPars
 
   @Override
   public FacetQuery parse(Object arg) throws SyntaxError {
+    parseCommonParams(arg);
+
     String qstring = null;
     if (arg instanceof String) {
       // just the field name...
@@ -601,7 +698,7 @@ class FacetFieldParser extends FacetPars
   }
 
   public FacetField parse(Object arg) throws SyntaxError {
-
+    parseCommonParams(arg);
     if (arg instanceof String) {
       // just the field name...
       facet.field = (String)arg;
@@ -674,6 +771,8 @@ class FacetRangeParser extends FacetPars
   }
 
   public FacetRange parse(Object arg) throws SyntaxError {
+    parseCommonParams(arg);
+
     if (!(arg instanceof Map)) {
       throw err("Missing range facet arguments");
     }

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java?rev=1676743&r1=1676742&r2=1676743&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/facet/TestJsonFacets.java Wed Apr 29 13:28:33 2015
@@ -818,15 +818,32 @@ public class TestJsonFacets extends Solr
     );
 
 
+    ////////////////////////////////////////////////////////////////////////////////////////////
+    // multi-select / exclude tagged filters via excludeTags
+    ////////////////////////////////////////////////////////////////////////////////////////////
+
+    // nested query facets on subset
+    client.testJQ(params(p, "q", "*:*", "fq","{!tag=abc}id:(2 3)"
+            , "json.facet", "{ " +
+                " f1:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" +
+                ",f2:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:abc }}" +
+                ",f3:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:'xyz,abc,qaz' }}" +
+                ",f4:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz , abc , qaz] }}" +
+                ",f5:{query:{q:'${cat_s}:B', facet:{nj:{query:'${where_s}:NJ'}, ny:{query:'${where_s}:NY'}} , excludeTags:[xyz,qaz]}}" +    // this is repeated, but it did fail when a single context was shared among sub-facets
+                "}"
+        )
+        , "facets=={ 'count':2, " +
+            " 'f1':{'count':1, 'nj':{'count':1}, 'ny':{'count':0}}" +
+            ",'f2':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
+            ",'f3':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
+            ",'f4':{'count':3, 'nj':{'count':2}, 'ny':{'count':1}}" +
+            ",'f5':{'count':1, 'nj':{'count':1}, 'ny':{'count':0}}" +
+            "}"
+    );
+
+
 
 
-    // TODO:
-    // numdocs('query') stat (don't make a bucket... just a count)
-    // missing(field)
-    // make missing configurable in min, max, etc
-    // exclusions
-    // zeroes
-    // instead of json.facet make it facet?
   }