You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2016/02/02 10:53:16 UTC
[04/21] lucene-solr git commit: SOLR-8532: GraphQuery don't collect edges at maxDepth level

SOLR-8532: GraphQuery don't collect edges at maxDepth level


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e6db8ba2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e6db8ba2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e6db8ba2

Branch: refs/heads/master-solr-8621
Commit: e6db8ba2149e9733b7ca4d19a90ff9a36c75df1e
Parents: c403083
Author: yonik <yo...@apache.org>
Authored: Fri Jan 29 10:59:49 2016 -0500
Committer: yonik <yo...@apache.org>
Committed: Fri Jan 29 10:59:49 2016 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 +
 .../org/apache/solr/search/join/GraphQuery.java | 89 +++++++++++---------
 .../solr/search/join/GraphTermsCollector.java   |  2 +-
 .../apache/solr/search/join/GraphQueryTest.java | 23 +++++
 4 files changed, 78 insertions(+), 39 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 5ff042f..f5c88a3 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -172,6 +172,9 @@ Optimizations
   count. Also includes change to move to the next non-zero term value when selecting a segment
   position. (Keith Laban, Steve Bower, Dennis Gove)
 
+* SOLR-8532: Optimize GraphQuery when maxDepth is set by not collecting edges at the maxDepth level.
+  (Kevin Watters via yonik)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java b/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
index 5f9bfd2..a31568a 100644
--- a/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
@@ -135,8 +135,8 @@ public class GraphQuery extends Query {
     SolrIndexSearcher fromSearcher;
     private float queryNorm = 1.0F;
     private float queryWeight = 1.0F; 
-    int frontierSize = 0;
-    public int currentDepth = 0;
+    private int frontierSize = 0;
+    private int currentDepth = -1;
     private Filter filter;
     private DocSet resultSet;
     
@@ -177,69 +177,82 @@ public class GraphQuery extends Query {
      * @throws IOException - if a sub search fails... maybe other cases too! :)
      */
     private DocSet getDocSet() throws IOException {
-      DocSet fromSet = null;
-      FixedBitSet seedResultBits = null;
       // Size that the bit set needs to be.
       int capacity = fromSearcher.getRawReader().maxDoc();
       // The bit set to contain the results that match the query.
       FixedBitSet resultBits = new FixedBitSet(capacity);
-      // The measure of how deep in the graph we have gone.
-      currentDepth = 0;
+      // this holds the result at each level
+      BitDocSet fromSet = null;
+      // the root docs if we return root is false
+      FixedBitSet rootBits = null;
       // the initial query for the frontier for the first query
       Query frontierQuery = q;
       // Find all documents in this graph that are leaf nodes to speed traversal
-      // TODO: speed this up in the future with HAS_FIELD type queries
-      BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
-      WildcardQuery edgeQuery = new WildcardQuery(new Term(toField, "*"));
-      leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
-      DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
+      DocSet leafNodes = resolveLeafNodes(toField);
       // Start the breadth first graph traversal.
+      
       do {
-        // Create the graph result collector for this level
-        GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
-        // traverse the level!
-        fromSearcher.search(frontierQuery, graphResultCollector);
-        // All edge ids on the frontier.
-        BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
-        frontierSize = collectorTerms.size();
-        // The resulting doc set from the frontier.
-        fromSet = graphResultCollector.getDocSet();
-        if (seedResultBits == null) {
-          // grab a copy of the seed bits  (these are the "rootNodes")
-          seedResultBits = ((BitDocSet)fromSet).getBits().clone();
+        // Increment how far we have gone in the frontier.
+        currentDepth++;
+        // if we are at the max level we don't need the graph terms collector.
+        // TODO validate that the join case works properly.
+        if (maxDepth != -1 && currentDepth >= maxDepth) {
+          // if we've reached the max depth, don't worry about collecting edges.
+          fromSet = fromSearcher.getDocSetBits(frontierQuery);
+          // explicitly the frontier size is zero now so we can break
+          frontierSize = 0;
+        } else {
+          // when we're not at the max depth level, we need to collect edges          
+          // Create the graph result collector for this level
+          GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
+          fromSearcher.search(frontierQuery, graphResultCollector);
+          fromSet = graphResultCollector.getDocSet();
+          // All edge ids on the frontier.
+          BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
+          frontierSize = collectorTerms.size();
+          // The resulting doc set from the frontier.
+          FrontierQuery fq = buildFrontierQuery(collectorTerms, frontierSize);
+          if (fq == null) {
+            // in case we get null back, make sure we know we're done at this level.
+            frontierSize = 0;
+          } else {
+            frontierQuery = fq.getQuery();
+            frontierSize = fq.getFrontierSize();
+          }
         }
-        Integer fs = new Integer(frontierSize);
-        FrontierQuery fq = buildFrontierQuery(collectorTerms, fs);
-        if (fq == null) {
-          // in case we get null back, make sure we know we're done at this level.
-          fq = new FrontierQuery(null, 0);
+        if (currentDepth == 0 && !returnRoot) {
+          // grab a copy of the root bits but only if we need it.
+          rootBits = fromSet.getBits();
         }
-        frontierQuery = fq.getQuery();
-        frontierSize = fq.getFrontierSize();
         // Add the bits from this level to the result set.
-        resultBits.or(((BitDocSet)fromSet).getBits());
-        // Increment how far we have gone in the frontier.
-        currentDepth++;
-        // Break out if we have reached our max depth
-        if (currentDepth >= maxDepth && maxDepth != -1) {
+        resultBits.or(fromSet.getBits());
+        // test if we discovered any new edges, if not , we're done.
+        if ((maxDepth != -1 && currentDepth >= maxDepth)) {
           break;
         }
-        // test if we discovered any new edges, if not , we're done.
       } while (frontierSize > 0);
       // helper bit set operations on the final result set
       if (!returnRoot) {
-        resultBits.andNot(seedResultBits);
+        resultBits.andNot(rootBits);
       }
+      // this is the final resulting filter.
       BitDocSet resultSet = new BitDocSet(resultBits);
       // If we only want to return leaf nodes do that here.
       if (onlyLeafNodes) {
         return resultSet.intersection(leafNodes);
       } else {
-        // create a doc set off the bits that we found.
         return resultSet;
       }
     }
     
+    private DocSet resolveLeafNodes(String field) throws IOException {
+      BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
+      WildcardQuery edgeQuery = new WildcardQuery(new Term(field, "*"));
+      leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
+      DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
+      return leafNodes;
+    }
+    
     /** Build an automaton to represent the frontier query */
     private Automaton buildAutomaton(BytesRefHash termBytesHash) {
       // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
index 6af3694..389721e 100644
--- a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
@@ -108,7 +108,7 @@ class GraphTermsCollector extends SimpleCollector implements Collector {
     numHits++;
   }
   
-  public DocSet getDocSet() {
+  public BitDocSet getDocSet() {
     if (bits == null) {
       // TODO: this shouldn't happen
       bits = new FixedBitSet(maxDoc);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
index 4385dcc..1f5de65 100644
--- a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
+++ b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
@@ -77,6 +77,29 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
     qr = createRequest(g4Query);    
     assertQ(qr,"//*[@numFound='2']");
     
+    String g5Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=0}id:doc_8";
+    qr = createRequest(g5Query);    
+    assertQ(qr,"//*[@numFound='1']");  
+
+    String g6Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8";
+    qr = createRequest(g6Query);    
+    assertQ(qr,"//*[@numFound='3']");
+    
+    String g7Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8";
+    qr = createRequest(g7Query);    
+    assertQ(qr,"//*[@numFound='2']");
+
+    String g8Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"true\" maxDepth=2}id:doc_8";
+    qr = createRequest(g8Query);    
+    assertQ(qr,"//*[@numFound='1']");
+
+    String g9Query = "{!graph from=\"node_id\" to=\"edge_id\" maxDepth=1}id:doc_1";
+    qr = createRequest(g9Query);    
+    assertQ(qr,"//*[@numFound='2']");
+    
+    String g10Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=false maxDepth=1}id:doc_1";
+    qr = createRequest(g10Query);    
+    assertQ(qr,"//*[@numFound='1']");
   }
 
   private SolrQueryRequest createRequest(String query) {