You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2016/02/02 10:53:16 UTC
[04/21] lucene-solr git commit: SOLR-8532: GraphQuery don't collect
edges at maxDepth level
SOLR-8532: GraphQuery don't collect edges at maxDepth level
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e6db8ba2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e6db8ba2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e6db8ba2
Branch: refs/heads/master-solr-8621
Commit: e6db8ba2149e9733b7ca4d19a90ff9a36c75df1e
Parents: c403083
Author: yonik <yo...@apache.org>
Authored: Fri Jan 29 10:59:49 2016 -0500
Committer: yonik <yo...@apache.org>
Committed: Fri Jan 29 10:59:49 2016 -0500
----------------------------------------------------------------------
solr/CHANGES.txt | 3 +
.../org/apache/solr/search/join/GraphQuery.java | 89 +++++++++++---------
.../solr/search/join/GraphTermsCollector.java | 2 +-
.../apache/solr/search/join/GraphQueryTest.java | 23 +++++
4 files changed, 78 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 5ff042f..f5c88a3 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -172,6 +172,9 @@ Optimizations
count. Also includes change to move to the next non-zero term value when selecting a segment
position. (Keith Laban, Steve Bower, Dennis Gove)
+* SOLR-8532: Optimize GraphQuery when maxDepth is set by not collecting edges at the maxDepth level.
+ (Kevin Watters via yonik)
+
Other Changes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java b/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
index 5f9bfd2..a31568a 100644
--- a/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
@@ -135,8 +135,8 @@ public class GraphQuery extends Query {
SolrIndexSearcher fromSearcher;
private float queryNorm = 1.0F;
private float queryWeight = 1.0F;
- int frontierSize = 0;
- public int currentDepth = 0;
+ private int frontierSize = 0;
+ private int currentDepth = -1;
private Filter filter;
private DocSet resultSet;
@@ -177,69 +177,82 @@ public class GraphQuery extends Query {
* @throws IOException - if a sub search fails... maybe other cases too! :)
*/
private DocSet getDocSet() throws IOException {
- DocSet fromSet = null;
- FixedBitSet seedResultBits = null;
// Size that the bit set needs to be.
int capacity = fromSearcher.getRawReader().maxDoc();
// The bit set to contain the results that match the query.
FixedBitSet resultBits = new FixedBitSet(capacity);
- // The measure of how deep in the graph we have gone.
- currentDepth = 0;
+ // this holds the result at each level
+ BitDocSet fromSet = null;
+ // the root docs if we return root is false
+ FixedBitSet rootBits = null;
// the initial query for the frontier for the first query
Query frontierQuery = q;
// Find all documents in this graph that are leaf nodes to speed traversal
- // TODO: speed this up in the future with HAS_FIELD type queries
- BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
- WildcardQuery edgeQuery = new WildcardQuery(new Term(toField, "*"));
- leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
- DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
+ DocSet leafNodes = resolveLeafNodes(toField);
// Start the breadth first graph traversal.
+
do {
- // Create the graph result collector for this level
- GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
- // traverse the level!
- fromSearcher.search(frontierQuery, graphResultCollector);
- // All edge ids on the frontier.
- BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
- frontierSize = collectorTerms.size();
- // The resulting doc set from the frontier.
- fromSet = graphResultCollector.getDocSet();
- if (seedResultBits == null) {
- // grab a copy of the seed bits (these are the "rootNodes")
- seedResultBits = ((BitDocSet)fromSet).getBits().clone();
+ // Increment how far we have gone in the frontier.
+ currentDepth++;
+ // if we are at the max level we don't need the graph terms collector.
+ // TODO validate that the join case works properly.
+ if (maxDepth != -1 && currentDepth >= maxDepth) {
+ // if we've reached the max depth, don't worry about collecting edges.
+ fromSet = fromSearcher.getDocSetBits(frontierQuery);
+ // explicitly the frontier size is zero now so we can break
+ frontierSize = 0;
+ } else {
+ // when we're not at the max depth level, we need to collect edges
+ // Create the graph result collector for this level
+ GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
+ fromSearcher.search(frontierQuery, graphResultCollector);
+ fromSet = graphResultCollector.getDocSet();
+ // All edge ids on the frontier.
+ BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
+ frontierSize = collectorTerms.size();
+ // The resulting doc set from the frontier.
+ FrontierQuery fq = buildFrontierQuery(collectorTerms, frontierSize);
+ if (fq == null) {
+ // in case we get null back, make sure we know we're done at this level.
+ frontierSize = 0;
+ } else {
+ frontierQuery = fq.getQuery();
+ frontierSize = fq.getFrontierSize();
+ }
}
- Integer fs = new Integer(frontierSize);
- FrontierQuery fq = buildFrontierQuery(collectorTerms, fs);
- if (fq == null) {
- // in case we get null back, make sure we know we're done at this level.
- fq = new FrontierQuery(null, 0);
+ if (currentDepth == 0 && !returnRoot) {
+ // grab a copy of the root bits but only if we need it.
+ rootBits = fromSet.getBits();
}
- frontierQuery = fq.getQuery();
- frontierSize = fq.getFrontierSize();
// Add the bits from this level to the result set.
- resultBits.or(((BitDocSet)fromSet).getBits());
- // Increment how far we have gone in the frontier.
- currentDepth++;
- // Break out if we have reached our max depth
- if (currentDepth >= maxDepth && maxDepth != -1) {
+ resultBits.or(fromSet.getBits());
+ // test if we discovered any new edges, if not , we're done.
+ if ((maxDepth != -1 && currentDepth >= maxDepth)) {
break;
}
- // test if we discovered any new edges, if not , we're done.
} while (frontierSize > 0);
// helper bit set operations on the final result set
if (!returnRoot) {
- resultBits.andNot(seedResultBits);
+ resultBits.andNot(rootBits);
}
+ // this is the final resulting filter.
BitDocSet resultSet = new BitDocSet(resultBits);
// If we only want to return leaf nodes do that here.
if (onlyLeafNodes) {
return resultSet.intersection(leafNodes);
} else {
- // create a doc set off the bits that we found.
return resultSet;
}
}
+ private DocSet resolveLeafNodes(String field) throws IOException {
+ BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
+ WildcardQuery edgeQuery = new WildcardQuery(new Term(field, "*"));
+ leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
+ DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
+ return leafNodes;
+ }
+
/** Build an automaton to represent the frontier query */
private Automaton buildAutomaton(BytesRefHash termBytesHash) {
// need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
index 6af3694..389721e 100644
--- a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
@@ -108,7 +108,7 @@ class GraphTermsCollector extends SimpleCollector implements Collector {
numHits++;
}
- public DocSet getDocSet() {
+ public BitDocSet getDocSet() {
if (bits == null) {
// TODO: this shouldn't happen
bits = new FixedBitSet(maxDoc);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e6db8ba2/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
index 4385dcc..1f5de65 100644
--- a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
+++ b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
@@ -77,6 +77,29 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
qr = createRequest(g4Query);
assertQ(qr,"//*[@numFound='2']");
+ String g5Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=0}id:doc_8";
+ qr = createRequest(g5Query);
+ assertQ(qr,"//*[@numFound='1']");
+
+ String g6Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"true\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8";
+ qr = createRequest(g6Query);
+ assertQ(qr,"//*[@numFound='3']");
+
+ String g7Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"false\" maxDepth=1}id:doc_8";
+ qr = createRequest(g7Query);
+ assertQ(qr,"//*[@numFound='2']");
+
+ String g8Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=\"false\" returnOnlyLeaf=\"true\" maxDepth=2}id:doc_8";
+ qr = createRequest(g8Query);
+ assertQ(qr,"//*[@numFound='1']");
+
+ String g9Query = "{!graph from=\"node_id\" to=\"edge_id\" maxDepth=1}id:doc_1";
+ qr = createRequest(g9Query);
+ assertQ(qr,"//*[@numFound='2']");
+
+ String g10Query = "{!graph from=\"node_id\" to=\"edge_id\" returnRoot=false maxDepth=1}id:doc_1";
+ qr = createRequest(g10Query);
+ assertQ(qr,"//*[@numFound='1']");
}
private SolrQueryRequest createRequest(String query) {