You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2011/04/27 03:42:06 UTC
svn commit: r1096978 - in /lucene/dev/trunk:
lucene/src/java/org/apache/lucene/index/ solr/
solr/src/java/org/apache/solr/handler/component/
solr/src/java/org/apache/solr/request/
solr/src/java/org/apache/solr/search/ solr/src/test-framework/org/apache...
Author: yonik
Date: Wed Apr 27 01:42:05 2011
New Revision: 1096978
URL: http://svn.apache.org/viewvc?rev=1096978&view=rev
Log:
SOLR-2272: Pseudo-join
Added:
lucene/dev/trunk/solr/src/java/org/apache/solr/search/JoinQParserPlugin.java (with props)
lucene/dev/trunk/solr/src/test/org/apache/solr/TestJoin.java (with props)
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java
lucene/dev/trunk/solr/src/java/org/apache/solr/request/SimpleFacets.java
lucene/dev/trunk/solr/src/java/org/apache/solr/request/SolrRequestInfo.java
lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/BitDocSet.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/DocSet.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/DocSlice.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/HashDocSet.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/QParserPlugin.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
lucene/dev/trunk/solr/src/java/org/apache/solr/search/SortedIntDocSet.java
lucene/dev/trunk/solr/src/test-framework/org/apache/solr/SolrTestCaseJ4.java
lucene/dev/trunk/solr/src/test/org/apache/solr/client/solrj/MultiCoreExampleTestBase.java
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java Wed Apr 27 01:42:05 2011
@@ -129,6 +129,8 @@ public class DocTermOrds {
protected BytesRef prefix;
protected int ordBase;
+ protected DocsEnum docsEnum; //used while uninverting
+
public long ramUsedInBytes() {
// can cache the mem size since it shouldn't change
if (memsz!=0) return memsz;
@@ -270,7 +272,7 @@ public class DocTermOrds {
// frequent terms ahead of time.
int termNum = 0;
- DocsEnum docsEnum = null;
+ docsEnum = null;
// Loop begins with te positioned to first term (we call
// seek above):
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Apr 27 01:42:05 2011
@@ -137,6 +137,13 @@ New Features
* SOLR-2383: /browse improvements: generalize range and date facet display
(Jan Høydahl via yonik)
+* SOLR-2272: Pseudo-join queries / filters. Examples:
+ To restrict to the set of parents with at least one blue-eyed child:
+ fq={!join from=parent to=name}eyes:blue
+ To restrict to the set of children with at least one blue-eyed parent:
+ fq={!join from=name to=parent}eyes:blue
+ (yonik)
+
Optimizations
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/handler/component/ResponseBuilder.java Wed Apr 27 01:42:05 2011
@@ -163,6 +163,25 @@ public class ResponseBuilder
debugInfo.add( name, val );
}
+ public void addDebug(Object val, String... path) {
+ if( debugInfo == null ) {
+ debugInfo = new SimpleOrderedMap<Object>();
+ }
+
+ NamedList<Object> target = debugInfo;
+ for (int i=0; i<path.length-1; i++) {
+ String elem = path[i];
+ NamedList<Object> newTarget = (NamedList<Object>)debugInfo.get(elem);
+ if (newTarget == null) {
+ newTarget = new SimpleOrderedMap<Object>();
+ target.add(elem, newTarget);
+ }
+ target = newTarget;
+ }
+
+ target.add(path[path.length-1], val);
+ }
+
//-------------------------------------------------------------------------
//-------------------------------------------------------------------------
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/request/SimpleFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/request/SimpleFacets.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/request/SimpleFacets.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/request/SimpleFacets.java Wed Apr 27 01:42:05 2011
@@ -21,6 +21,7 @@ import org.apache.lucene.index.*;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.*;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.packed.Direct16;
import org.apache.lucene.util.packed.Direct32;
import org.apache.lucene.util.packed.Direct8;
@@ -682,14 +683,15 @@ public class SimpleFacets {
if (deState==null) {
deState = new SolrIndexSearcher.DocsEnumState();
+ deState.fieldName = StringHelper.intern(field);
deState.deletedDocs = MultiFields.getDeletedDocs(r);
deState.termsEnum = termsEnum;
- deState.reuse = docsEnum;
+ deState.docsEnum = docsEnum;
}
- c = searcher.numDocs(new TermQuery(t), docs, deState);
+ c = searcher.numDocs(docs, deState);
- docsEnum = deState.reuse;
+ docsEnum = deState.docsEnum;
} else {
// iterate over TermDocs to calculate the intersection
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/request/SolrRequestInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/request/SolrRequestInfo.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/request/SolrRequestInfo.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/request/SolrRequestInfo.java Wed Apr 27 01:42:05 2011
@@ -17,11 +17,15 @@
package org.apache.solr.request;
+import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.response.SolrQueryResponse;
+import java.io.Closeable;
import java.util.Date;
+import java.util.LinkedList;
+import java.util.List;
public class SolrRequestInfo {
@@ -31,6 +35,8 @@ public class SolrRequestInfo {
protected SolrQueryResponse rsp;
protected Date now;
protected ResponseBuilder rb;
+ protected List<Closeable> closeHooks;
+
public static SolrRequestInfo getRequestInfo() {
return threadLocal.get();
@@ -48,7 +54,20 @@ public class SolrRequestInfo {
}
public static void clearRequestInfo() {
- threadLocal.remove();
+ try {
+ SolrRequestInfo info = threadLocal.get();
+ if (info != null && info.closeHooks != null) {
+ for (Closeable hook : info.closeHooks) {
+ try {
+ hook.close();
+ } catch (Throwable throwable) {
+ SolrException.log(SolrCore.log, "Exception during close hook", throwable);
+ }
+ }
+ }
+ } finally {
+ threadLocal.remove();
+ }
}
public SolrRequestInfo(SolrQueryRequest req, SolrQueryResponse rsp) {
@@ -88,4 +107,14 @@ public class SolrRequestInfo {
public void setResponseBuilder(ResponseBuilder rb) {
this.rb = rb;
}
+
+ public void addCloseHook(Closeable hook) {
+ // is this better here, or on SolrQueryRequest?
+ synchronized (this) {
+ if (closeHooks == null) {
+ closeHooks = new LinkedList<Closeable>();
+ }
+ closeHooks.add(hook);
+ }
+ }
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/request/UnInvertedField.java Wed Apr 27 01:42:05 2011
@@ -23,6 +23,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.util.StringHelper;
import org.apache.noggit.CharArr;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.NamedList;
@@ -122,11 +123,15 @@ public class UnInvertedField extends Doc
if (deState == null) {
deState = new SolrIndexSearcher.DocsEnumState();
- deState.termsEnum = te;
- }
-
- maxTermCounts[termNum] = searcher.getDocSet(new TermQuery(new Term(field, topTerm.term)), deState).size();
- //System.out.println(" big term termNum=" + termNum + " term=" + topTerm.term.utf8ToString() + " size=" + maxTermCounts[termNum] + " dF=" + te.docFreq());
+ deState.fieldName = StringHelper.intern(field);
+ // deState.termsEnum = te.tenum;
+ deState.termsEnum = te; // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
+ deState.docsEnum = docsEnum;
+ deState.minSetSizeCached = maxTermDocFreq;
+ }
+ docsEnum = deState.docsEnum;
+ DocSet set = searcher.getDocSet(deState);
+ maxTermCounts[termNum] = set.size();
}
}
@@ -158,10 +163,10 @@ public class UnInvertedField extends Doc
super(field,
// threshold, over which we use set intersections instead of counting
// to (1) save memory, and (2) speed up faceting.
- // Add 1 for testing purposes so that there will always be some terms under
+ // Add 2 for testing purposes so that there will always be some terms under
// the threshold even when the index is very
// small.
- searcher.maxDoc()/20 + 1,
+ searcher.maxDoc()/20 + 2,
DEFAULT_INDEX_INTERVAL_BITS);
//System.out.println("maxTermDocFreq=" + maxTermDocFreq + " maxDoc=" + searcher.maxDoc());
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/BitDocSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/BitDocSet.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/BitDocSet.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/BitDocSet.java Wed Apr 27 01:42:05 2011
@@ -161,6 +161,16 @@ public class BitDocSet extends DocSetBas
}
@Override
+ public boolean intersects(DocSet other) {
+ if (other instanceof BitDocSet) {
+ return bits.intersects(((BitDocSet)other).bits);
+ } else {
+ // they had better not call us back!
+ return other.intersects(this);
+ }
+ }
+
+ @Override
public int unionSize(DocSet other) {
if (other instanceof BitDocSet) {
// if we don't know our current size, this is faster than
@@ -184,6 +194,11 @@ public class BitDocSet extends DocSetBas
}
@Override
+ public void setBitsOn(OpenBitSet target) {
+ target.union(bits);
+ }
+
+ @Override
public DocSet andNot(DocSet other) {
OpenBitSet newbits = (OpenBitSet)(bits.clone());
if (other instanceof BitDocSet) {
@@ -211,4 +226,9 @@ public class BitDocSet extends DocSetBas
public long memSize() {
return (bits.getBits().length << 3) + 16;
}
+
+ @Override
+ protected BitDocSet clone() {
+ return new BitDocSet((OpenBitSet)bits.clone(), size);
+ }
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/DocSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/DocSet.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/DocSet.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/DocSet.java Wed Apr 27 01:42:05 2011
@@ -115,6 +115,9 @@ public interface DocSet /* extends Colle
*/
public int intersectionSize(DocSet other);
+ /** Returns true if these sets have any elements in common */
+ public boolean intersects(DocSet other);
+
/**
* Returns the union of this set with another set. Neither set is modified - a new DocSet is
* created and returned.
@@ -146,6 +149,14 @@ public interface DocSet /* extends Colle
* methods will be invoked with.
*/
public Filter getTopFilter();
+
+ /**
+ * Takes the docs from this set and sets those bits on the target OpenBitSet.
+ * The target should be sized large enough to accommodate all of the documents before calling this method.
+ */
+ public void setBitsOn(OpenBitSet target);
+
+ public static DocSet EMPTY = new SortedIntDocSet(new int[0], 0);
}
/** A base class that may be usefull for implementing DocSets */
@@ -213,6 +224,17 @@ abstract class DocSetBase implements Doc
return new BitDocSet(newbits);
}
+ public boolean intersects(DocSet other) {
+ // intersection is overloaded in the smaller DocSets to be more
+ // efficient, so dispatch off of it instead.
+ if (!(other instanceof BitDocSet)) {
+ return other.intersects(this);
+ }
+ // less efficient way: get the intersection size
+ return intersectionSize(other) > 0;
+ }
+
+
public DocSet union(DocSet other) {
OpenBitSet newbits = (OpenBitSet)(this.getBits().clone());
newbits.or(other.getBits());
@@ -295,6 +317,14 @@ abstract class DocSetBase implements Doc
}
};
}
+
+ public void setBitsOn(OpenBitSet target) {
+ DocIterator iter = iterator();
+ while (iter.hasNext()) {
+ target.fastSet(iter.nextDoc());
+ }
+ }
+
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/DocSlice.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/DocSlice.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/DocSlice.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/DocSlice.java Wed Apr 27 01:42:05 2011
@@ -17,6 +17,8 @@
package org.apache.solr.search;
+import java.util.Arrays;
+
/**
* <code>DocSlice</code> implements DocList as an array of docids and optional scores.
*
@@ -141,4 +143,22 @@ public class DocSlice extends DocSetBase
HashDocSet h = new HashDocSet(docs,offset,len);
return h.intersectionSize(other);
}
+
+ @Override
+ public boolean intersects(DocSet other) {
+ if (other instanceof SortedIntDocSet || other instanceof HashDocSet) {
+ return other.intersects(this);
+ }
+ HashDocSet h = new HashDocSet(docs,offset,len);
+ return h.intersects(other);
+ }
+
+ @Override
+ protected DocSlice clone() {
+ try {
+ // DocSlice is not currently mutable
+ DocSlice slice = (DocSlice) super.clone();
+ } catch (CloneNotSupportedException e) {}
+ return null;
+ }
}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/HashDocSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/HashDocSet.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/HashDocSet.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/HashDocSet.java Wed Apr 27 01:42:05 2011
@@ -48,6 +48,12 @@ public final class HashDocSet extends Do
private final int mask;
+ public HashDocSet(HashDocSet set) {
+ this.table = set.table.clone();
+ this.size = set.size;
+ this.mask = set.mask;
+ }
+
/** Create a HashDocSet from a list of *unique* ids */
public HashDocSet(int[] docs, int offset, int len) {
this(docs, offset, len, DEFAULT_INVERSE_LOAD_FACTOR);
@@ -207,6 +213,31 @@ public final class HashDocSet extends Do
}
+ @Override
+ public boolean intersects(DocSet other) {
+ if (other instanceof HashDocSet) {
+ // set "a" to the smallest doc set for the most efficient
+ // intersection.
+ final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
+ final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
+
+ for (int i=0; i<a.table.length; i++) {
+ int id=a.table[i];
+ if (id >= 0 && b.exists(id)) {
+ return true;
+ }
+ }
+ return false;
+ } else {
+ for (int i=0; i<table.length; i++) {
+ int id=table[i];
+ if (id >= 0 && other.exists(id)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
@Override
public DocSet andNot(DocSet other) {
@@ -249,6 +280,10 @@ public final class HashDocSet extends Do
}
}
+ @Override
+ protected HashDocSet clone() {
+ return new HashDocSet(this);
+ }
// don't implement andNotSize() and unionSize() on purpose... they are implemented
// in BaseDocSet in terms of intersectionSize().
Added: lucene/dev/trunk/solr/src/java/org/apache/solr/search/JoinQParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/JoinQParserPlugin.java?rev=1096978&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/JoinQParserPlugin.java (added)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/JoinQParserPlugin.java Wed Apr 27 01:42:05 2011
@@ -0,0 +1,572 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import org.apache.lucene.index.*;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.search.*;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.StringHelper;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.core.CoreContainer;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.component.ResponseBuilder;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrRequestInfo;
+import org.apache.solr.schema.TrieField;
+import org.apache.solr.util.RefCounted;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Set;
+
+
+public class JoinQParserPlugin extends QParserPlugin {
+ public static String NAME = "join";
+
+ public void init(NamedList args) {
+ }
+
+ public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
+ return new QParser(qstr, localParams, params, req) {
+ public Query parse() throws ParseException {
+ String fromField = getParam("from");
+ String fromIndex = getParam("fromIndex");
+ String toField = getParam("to");
+ String v = localParams.get("v");
+ QParser fromQueryParser = subQuery(v, "lucene");
+ Query fromQuery = fromQueryParser.getQuery();
+ JoinQuery jq = new JoinQuery(fromField, toField, fromIndex, fromQuery);
+ return jq;
+ }
+ };
+ }
+}
+
+
+class JoinQuery extends Query {
+ String fromField;
+ String toField;
+ String fromIndex;
+ Query q;
+
+ public JoinQuery(String fromField, String toField, String fromIndex, Query subQuery) {
+ this.fromField = fromField;
+ this.toField = toField;
+ this.fromIndex = fromIndex;
+ this.q = subQuery;
+ }
+
+ public Query getQuery() { return q; }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ Query newQ = q.rewrite(reader);
+ if (newQ == q) return this;
+ JoinQuery nq = (JoinQuery)this.clone();
+ nq.q = newQ;
+ return nq;
+ }
+
+ @Override
+ public void extractTerms(Set terms) {
+ q.extractTerms(terms);
+ }
+
+ public Weight createWeight(IndexSearcher searcher) throws IOException {
+ return new JoinQueryWeight((SolrIndexSearcher)searcher);
+ }
+
+ private class JoinQueryWeight extends Weight {
+ SolrIndexSearcher fromSearcher;
+ RefCounted<SolrIndexSearcher> fromRef;
+ SolrIndexSearcher toSearcher;
+ private Similarity similarity;
+ private float queryNorm;
+ private float queryWeight;
+ ResponseBuilder rb;
+
+ public JoinQueryWeight(SolrIndexSearcher searcher) throws IOException {
+ this.fromSearcher = searcher;
+ SolrRequestInfo info = SolrRequestInfo.getRequestInfo();
+ if (info != null) {
+ rb = info.getResponseBuilder();
+ }
+
+ if (fromIndex == null) {
+ this.fromSearcher = searcher;
+ } else {
+ if (info == null) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cross-core join must have SolrRequestInfo");
+ }
+
+ CoreContainer container = searcher.getCore().getCoreDescriptor().getCoreContainer();
+ final SolrCore fromCore = container.getCore(fromIndex);
+
+ if (fromCore == null) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cross-core join: no such core ");
+ }
+
+ if (info.getReq().getCore() == fromCore) {
+ // if this is the same core, use the searcher passed in... otherwise we could be warming and
+ // get an older searcher from the core.
+ fromSearcher = searcher;
+ } else {
+ // This could block if there is a static warming query with a join in it, and if useColdSearcher is true.
+ // Deadlock could result if two cores both had useColdSearcher and had joins that used eachother.
+ // This would be very predictable though (should happen every time if misconfigured)
+ fromRef = fromCore.getSearcher(false, true, null);
+
+ // be careful not to do anything with this searcher that requires the thread local
+ // SolrRequestInfo in a manner that requires the core in the request to match
+ fromSearcher = fromRef.get();
+ }
+
+ if (fromRef != null) {
+ final RefCounted<SolrIndexSearcher> ref = fromRef;
+ info.addCloseHook(new Closeable() {
+ @Override
+ public void close() throws IOException {
+ ref.decref();
+ }
+ });
+ }
+
+ info.addCloseHook(new Closeable() {
+ @Override
+ public void close() throws IOException {
+ fromCore.close();
+ }
+ });
+
+ }
+ this.toSearcher = searcher;
+ }
+
+ public Query getQuery() {
+ return JoinQuery.this;
+ }
+
+ public float getValue() {
+ return getBoost();
+ }
+
+ @Override
+ public float sumOfSquaredWeights() throws IOException {
+ queryWeight = getBoost();
+ return queryWeight * queryWeight;
+ }
+
+ @Override
+ public void normalize(float norm) {
+ this.queryNorm = norm;
+ queryWeight *= this.queryNorm;
+ }
+
+ DocSet resultSet;
+ Filter filter;
+
+
+
+ @Override
+ public Scorer scorer(IndexReader.AtomicReaderContext context, ScorerContext scorerContext) throws IOException {
+ if (filter == null) {
+ boolean debug = rb != null && rb.isDebug();
+ long start = debug ? System.currentTimeMillis() : 0;
+ resultSet = getDocSet();
+ long end = debug ? System.currentTimeMillis() : 0;
+
+ if (debug) {
+ SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<Object>();
+ dbg.add("time", (end-start));
+ dbg.add("fromSetSize", fromSetSize); // the input
+ dbg.add("toSetSize", resultSet.size()); // the output
+
+ dbg.add("fromTermCount", fromTermCount);
+ dbg.add("fromTermTotalDf", fromTermTotalDf);
+ dbg.add("fromTermDirectCount", fromTermDirectCount);
+ dbg.add("fromTermHits", fromTermHits);
+ dbg.add("fromTermHitsTotalDf", fromTermHitsTotalDf);
+ dbg.add("toTermHits", toTermHits);
+ dbg.add("toTermHitsTotalDf", toTermHitsTotalDf);
+ dbg.add("toTermDirectCount", toTermDirectCount);
+ dbg.add("smallSetsDeferred", smallSetsDeferred);
+ dbg.add("toSetDocsAdded", resultListDocs);
+
+ // TODO: perhaps synchronize addDebug in the future...
+ rb.addDebug(dbg, "join", JoinQuery.this.toString());
+ }
+
+ filter = resultSet.getTopFilter();
+ }
+
+ DocIdSet readerSet = filter.getDocIdSet(context);
+ if (readerSet == null) readerSet=DocIdSet.EMPTY_DOCIDSET;
+ return new JoinScorer(this, readerSet.iterator());
+ }
+
+
+ int fromSetSize; // number of docs in the fromSet (that match the from query)
+ long resultListDocs; // total number of docs collected
+ int fromTermCount;
+ long fromTermTotalDf;
+ int fromTermDirectCount; // number of fromTerms that were too small to use the filter cache
+ int fromTermHits; // number of fromTerms that intersected the from query
+ long fromTermHitsTotalDf; // sum of the df of the matching terms
+ int toTermHits; // num if intersecting from terms that match a term in the to field
+ long toTermHitsTotalDf; // sum of the df for the toTermHits
+ int toTermDirectCount; // number of toTerms that we set directly on a bitset rather than doing set intersections
+ int smallSetsDeferred; // number of small sets collected to be used later to intersect w/ bitset or create another small set
+
+
+ public DocSet getDocSet() throws IOException {
+ OpenBitSet resultBits = null;
+
+ // minimum docFreq to use the cache
+ int minDocFreqFrom = Math.max(5, fromSearcher.maxDoc() >> 13);
+ int minDocFreqTo = Math.max(5, toSearcher.maxDoc() >> 13);
+
+ // use a smaller size than normal since we will need to sort and dedup the results
+ int maxSortedIntSize = Math.max(10, toSearcher.maxDoc() >> 10);
+
+ DocSet fromSet = fromSearcher.getDocSet(q);
+ fromSetSize = fromSet.size();
+
+ List<DocSet> resultList = new ArrayList<DocSet>(10);
+
+ // make sure we have a set that is fast for random access, if we will use it for that
+ DocSet fastForRandomSet = fromSet;
+ if (minDocFreqFrom>0 && fromSet instanceof SortedIntDocSet) {
+ SortedIntDocSet sset = (SortedIntDocSet)fromSet;
+ fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size());
+ }
+
+ Fields fromFields = MultiFields.getFields(fromSearcher.getIndexReader());
+ Fields toFields = fromSearcher==toSearcher ? fromFields : MultiFields.getFields(toSearcher.getIndexReader());
+ if (fromFields == null) return DocSet.EMPTY;
+ Terms terms = fromFields.terms(fromField);
+ Terms toTerms = toFields.terms(toField);
+ if (terms == null || toTerms==null) return DocSet.EMPTY;
+ String prefixStr = TrieField.getMainValuePrefix(fromSearcher.getSchema().getFieldType(fromField));
+ BytesRef prefix = prefixStr == null ? null : new BytesRef(prefixStr);
+
+ BytesRef term = null;
+ TermsEnum termsEnum = terms.iterator();
+ TermsEnum toTermsEnum = toTerms.iterator();
+ SolrIndexSearcher.DocsEnumState fromDeState = null;
+ SolrIndexSearcher.DocsEnumState toDeState = null;
+
+ if (prefix == null) {
+ term = termsEnum.next();
+ } else {
+ if (termsEnum.seek(prefix, true) != TermsEnum.SeekStatus.END) {
+ term = termsEnum.term();
+ }
+ }
+
+ Bits fromDeletedDocs = MultiFields.getDeletedDocs(fromSearcher.getIndexReader());
+ Bits toDeletedDocs = fromSearcher == toSearcher ? fromDeletedDocs : MultiFields.getDeletedDocs(toSearcher.getIndexReader());
+
+ fromDeState = new SolrIndexSearcher.DocsEnumState();
+ fromDeState.fieldName = StringHelper.intern(fromField);
+ fromDeState.deletedDocs = fromDeletedDocs;
+ fromDeState.termsEnum = termsEnum;
+ fromDeState.docsEnum = null;
+ fromDeState.minSetSizeCached = minDocFreqFrom;
+
+ toDeState = new SolrIndexSearcher.DocsEnumState();
+ toDeState.fieldName = StringHelper.intern(toField);
+ toDeState.deletedDocs = toDeletedDocs;
+ toDeState.termsEnum = toTermsEnum;
+ toDeState.docsEnum = null;
+ toDeState.minSetSizeCached = minDocFreqTo;
+
+ while (term != null) {
+ if (prefix != null && !term.startsWith(prefix))
+ break;
+
+ fromTermCount++;
+
+ boolean intersects = false;
+ int freq = termsEnum.docFreq();
+ fromTermTotalDf++;
+
+ if (freq < minDocFreqFrom) {
+ fromTermDirectCount++;
+ // OK to skip deletedDocs, since we check for intersection with docs matching query
+ fromDeState.docsEnum = fromDeState.termsEnum.docs(null, fromDeState.docsEnum);
+ DocsEnum docsEnum = fromDeState.docsEnum;
+
+ if (docsEnum instanceof MultiDocsEnum) {
+ MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum)docsEnum).getSubs();
+ int numSubs = ((MultiDocsEnum)docsEnum).getNumSubs();
+ outer: for (int subindex = 0; subindex<numSubs; subindex++) {
+ MultiDocsEnum.EnumWithSlice sub = subs[subindex];
+ if (sub.docsEnum == null) continue;
+ DocsEnum.BulkReadResult bulk = sub.docsEnum.getBulkResult();
+ int base = sub.slice.start;
+ for (;;) {
+ int nDocs = sub.docsEnum.read();
+ if (nDocs == 0) break;
+ int[] docArr = bulk.docs.ints; // this might be movable outside the loop, but perhaps not worth the risk.
+ int end = bulk.docs.offset + nDocs;
+ for (int i=bulk.docs.offset; i<end; i++) {
+ if (fastForRandomSet.exists(docArr[i]+base)) {
+ intersects = true;
+ break outer;
+ }
+ }
+ }
+ }
+ } else {
+ // this should be the same bulk result object if sharing of the docsEnum succeeded
+ DocsEnum.BulkReadResult bulk = docsEnum.getBulkResult();
+
+ outer: for (;;) {
+ int nDocs = docsEnum.read();
+ if (nDocs == 0) break;
+ int[] docArr = bulk.docs.ints; // this might be movable outside the loop, but perhaps not worth the risk.
+ int end = bulk.docs.offset + nDocs;
+ for (int i=bulk.docs.offset; i<end; i++) {
+ if (fastForRandomSet.exists(docArr[i])) {
+ intersects = true;
+ break outer;
+ }
+ }
+ }
+ }
+ } else {
+ // use the filter cache
+ DocSet fromTermSet = fromSearcher.getDocSet(fromDeState);
+ intersects = fromSet.intersects(fromTermSet);
+ }
+
+ if (intersects) {
+ fromTermHits++;
+ fromTermHitsTotalDf++;
+ TermsEnum.SeekStatus status = toTermsEnum.seek(term);
+ if (status == TermsEnum.SeekStatus.END) break;
+ if (status == TermsEnum.SeekStatus.FOUND) {
+ toTermHits++;
+ int df = toTermsEnum.docFreq();
+ toTermHitsTotalDf += df;
+ if (resultBits==null && df + resultListDocs > maxSortedIntSize && resultList.size() > 0) {
+ resultBits = new OpenBitSet(toSearcher.maxDoc());
+ }
+
+ // if we don't have a bitset yet, or if the resulting set will be too large
+ // use the filterCache to get a DocSet
+ if (toTermsEnum.docFreq() >= minDocFreqTo || resultBits == null) {
+ // use filter cache
+ DocSet toTermSet = toSearcher.getDocSet(toDeState);
+ resultListDocs += toTermSet.size();
+ if (resultBits != null) {
+ toTermSet.setBitsOn(resultBits);
+ } else {
+ if (toTermSet instanceof BitDocSet) {
+ resultBits = (OpenBitSet)((BitDocSet)toTermSet).bits.clone();
+ } else {
+ resultList.add(toTermSet);
+ }
+ }
+ } else {
+ toTermDirectCount++;
+
+ // need to use deletedDocs here so we don't map to any deleted ones
+ toDeState.docsEnum = toDeState.termsEnum.docs(toDeState.deletedDocs, toDeState.docsEnum);
+ DocsEnum docsEnum = toDeState.docsEnum;
+
+ if (docsEnum instanceof MultiDocsEnum) {
+ MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum)docsEnum).getSubs();
+ int numSubs = ((MultiDocsEnum)docsEnum).getNumSubs();
+ for (int subindex = 0; subindex<numSubs; subindex++) {
+ MultiDocsEnum.EnumWithSlice sub = subs[subindex];
+ if (sub.docsEnum == null) continue;
+ DocsEnum.BulkReadResult bulk = sub.docsEnum.getBulkResult();
+ int base = sub.slice.start;
+ for (;;) {
+ int nDocs = sub.docsEnum.read();
+ if (nDocs == 0) break;
+ resultListDocs += nDocs;
+ int[] docArr = bulk.docs.ints; // this might be movable outside the loop, but perhaps not worth the risk.
+ int end = bulk.docs.offset + nDocs;
+ for (int i=bulk.docs.offset; i<end; i++) {
+ resultBits.fastSet(docArr[i]+base);
+ }
+ }
+ }
+ } else {
+ // this should be the same bulk result object if sharing of the docsEnum succeeded
+ DocsEnum.BulkReadResult bulk = docsEnum.getBulkResult();
+
+ for (;;) {
+ int nDocs = docsEnum.read();
+ if (nDocs == 0) break;
+ resultListDocs += nDocs;
+ int[] docArr = bulk.docs.ints; // this might be movable outside the loop, but perhaps not worth the risk.
+ int end = bulk.docs.offset + nDocs;
+ for (int i=bulk.docs.offset; i<end; i++) {
+ resultBits.fastSet(docArr[i]);
+ }
+ }
+ }
+ }
+
+ }
+ }
+
+ term = termsEnum.next();
+ }
+
+ smallSetsDeferred = resultList.size();
+
+ if (resultBits != null) {
+ for (DocSet set : resultList) {
+ set.setBitsOn(resultBits);
+ }
+ return new BitDocSet(resultBits);
+ }
+
+ if (resultList.size()==0) {
+ return DocSet.EMPTY;
+ }
+
+ if (resultList.size() == 1) {
+ return resultList.get(0);
+ }
+
+ int sz = resultList.size();
+
+ for (DocSet set : resultList)
+ sz += set.size();
+
+ int[] docs = new int[sz];
+ int pos = 0;
+ for (DocSet set : resultList) {
+ System.arraycopy(((SortedIntDocSet)set).getDocs(), 0, docs, pos, set.size());
+ pos += set.size();
+ }
+ Arrays.sort(docs);
+ int[] dedup = new int[sz];
+ pos = 0;
+ int last = -1;
+ for (int doc : docs) {
+ if (doc != last)
+ dedup[pos++] = doc;
+ last = doc;
+ }
+
+ if (pos != dedup.length) {
+ dedup = Arrays.copyOf(dedup, pos);
+ }
+
+ return new SortedIntDocSet(dedup, dedup.length);
+ }
+
+ @Override
+ public Explanation explain(IndexReader.AtomicReaderContext context, int doc) throws IOException {
+ Scorer scorer = scorer(context, null);
+ boolean exists = scorer.advance(doc) == doc;
+
+ ComplexExplanation result = new ComplexExplanation();
+
+ if (exists) {
+ result.setDescription(this.toString()
+ + " , product of:");
+ result.setValue(queryWeight);
+ result.setMatch(Boolean.TRUE);
+ result.addDetail(new Explanation(getBoost(), "boost"));
+ result.addDetail(new Explanation(queryNorm,"queryNorm"));
+ } else {
+ result.setDescription(this.toString()
+ + " doesn't match id " + doc);
+ result.setValue(0);
+ result.setMatch(Boolean.FALSE);
+ }
+ return result;
+ }
+ }
+
+
+ protected static class JoinScorer extends Scorer {
+ final DocIdSetIterator iter;
+ final float score;
+ int doc = -1;
+
+ public JoinScorer(Weight w, DocIdSetIterator iter) throws IOException {
+ super(w);
+ score = w.getValue();
+ this.iter = iter==null ? DocIdSet.EMPTY_DOCIDSET.iterator() : iter;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return iter.nextDoc();
+ }
+
+ @Override
+ public int docID() {
+ return iter.docID();
+ }
+
+ @Override
+ public float score() throws IOException {
+ return score;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return iter.advance(target);
+ }
+ }
+
+
+ @Override
+ public String toString(String field) {
+ return "{!join from="+fromField+" to="+toField
+ + (fromIndex != null ? " fromIndex="+fromIndex : "")
+ +"}"+q.toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (getClass() != o.getClass()) return false;
+ JoinQuery other = (JoinQuery)o;
+ return this.fromField.equals(other.fromField)
+ && this.toField.equals(other.toField)
+ && this.getBoost() == other.getBoost()
+ && this.q.equals(other.q)
+ && (this.fromIndex == other.fromIndex || this.fromIndex != null && this.fromIndex.equals(other.fromIndex));
+ }
+
+ @Override
+ public int hashCode() {
+ int h = q.hashCode();
+ h = h * 31 + fromField.hashCode();
+ h = h * 31 + toField.hashCode();
+ return h;
+ }
+
+}
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/QParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/QParserPlugin.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/QParserPlugin.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/QParserPlugin.java Wed Apr 27 01:42:05 2011
@@ -40,6 +40,7 @@ public abstract class QParserPlugin impl
FunctionRangeQParserPlugin.NAME, FunctionRangeQParserPlugin.class,
SpatialFilterQParserPlugin.NAME, SpatialFilterQParserPlugin.class,
SpatialBoxQParserPlugin.NAME, SpatialBoxQParserPlugin.class,
+ JoinQParserPlugin.NAME, JoinQParserPlugin.class,
};
/** return a {@link QParser} */
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrIndexSearcher.java Wed Apr 27 01:42:05 2011
@@ -28,12 +28,17 @@ import org.apache.lucene.store.FSDirecto
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
+import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.request.UnInvertedField;
+import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.slf4j.Logger;
@@ -189,6 +194,10 @@ public class SolrIndexSearcher extends I
return name;
}
+ public SolrCore getCore() {
+ return core;
+ }
+
/** Register sub-objects such as caches
*/
@@ -576,32 +585,6 @@ public class SolrIndexSearcher extends I
return answer;
}
- /** lucene.internal */
- public DocSet getDocSet(Query query, DocsEnumState deState) throws IOException {
- // Get the absolute value (positive version) of this query. If we
- // get back the same reference, we know it's positive.
- Query absQ = QueryUtils.getAbs(query);
- boolean positive = query==absQ;
-
- if (filterCache != null) {
- DocSet absAnswer = filterCache.get(absQ);
- if (absAnswer!=null) {
- if (positive) return absAnswer;
- else return getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer);
- }
- }
-
- DocSet absAnswer = getDocSetNC(absQ, null, deState);
- DocSet answer = positive ? absAnswer : getPositiveDocSet(matchAllDocsQuery, deState).andNot(absAnswer);
-
- if (filterCache != null) {
- // cache negative queries as positive
- filterCache.put(absQ, absAnswer);
- }
-
- return answer;
- }
-
// only handle positive (non negative) queries
DocSet getPositiveDocSet(Query q) throws IOException {
DocSet answer;
@@ -614,18 +597,6 @@ public class SolrIndexSearcher extends I
return answer;
}
- // only handle positive (non negative) queries
- DocSet getPositiveDocSet(Query q, DocsEnumState deState) throws IOException {
- DocSet answer;
- if (filterCache != null) {
- answer = filterCache.get(q);
- if (answer!=null) return answer;
- }
- answer = getDocSetNC(q,null,deState);
- if (filterCache != null) filterCache.put(q,answer);
- return answer;
- }
-
private static Query matchAllDocsQuery = new MatchAllDocsQuery();
/**
@@ -756,21 +727,31 @@ public class SolrIndexSearcher extends I
}
- // query must be positive
- protected DocSet getDocSetNC(Query query, DocSet filter, DocsEnumState deState) throws IOException {
- if (filter != null) return getDocSetNC(query, filter, null);
+ /** lucene.internal */
+ public DocSet getDocSet(DocsEnumState deState) throws IOException {
+ int largestPossible = deState.termsEnum.docFreq();
+ boolean useCache = filterCache != null && largestPossible >= deState.minSetSizeCached;
+ TermQuery key = null;
+
+ if (useCache) {
+ key = new TermQuery(new Term(deState.fieldName, new BytesRef(deState.termsEnum.term()), false));
+ DocSet result = filterCache.get(key);
+ if (result != null) return result;
+ }
int smallSetSize = maxDoc()>>6;
- int largestPossible = deState.termsEnum.docFreq();
+ int scratchSize = Math.min(smallSetSize, largestPossible);
+ if (deState.scratch == null || deState.scratch.length < scratchSize)
+ deState.scratch = new int[scratchSize];
- int[] docs = new int[Math.min(smallSetSize, largestPossible)];
+ final int[] docs = deState.scratch;
int upto = 0;
int bitsSet = 0;
OpenBitSet obs = null;
- DocsEnum docsEnum = deState.termsEnum.docs(deState.deletedDocs, deState.reuse);
- if (deState.reuse == null) {
- deState.reuse = docsEnum;
+ DocsEnum docsEnum = deState.termsEnum.docs(deState.deletedDocs, deState.docsEnum);
+ if (deState.docsEnum == null) {
+ deState.docsEnum = docsEnum;
}
if (docsEnum instanceof MultiDocsEnum) {
@@ -822,15 +803,22 @@ public class SolrIndexSearcher extends I
}
}
+ DocSet result;
if (obs != null) {
for (int i=0; i<upto; i++) {
obs.fastSet(docs[i]);
}
bitsSet += upto;
- return new BitDocSet(obs, bitsSet);
+ result = new BitDocSet(obs, bitsSet);
+ } else {
+ result = new SortedIntDocSet(Arrays.copyOf(docs, upto));
}
- return new SortedIntDocSet(docs, upto);
+ if (useCache) {
+ filterCache.put(key, result);
+ }
+
+ return result;
}
// query must be positive
@@ -1640,17 +1628,20 @@ public class SolrIndexSearcher extends I
}
/** @lucene.internal */
- public int numDocs(Query a, DocSet b, DocsEnumState deState) throws IOException {
+ public int numDocs(DocSet a, DocsEnumState deState) throws IOException {
// Negative query if absolute value different from original
- Query absQ = QueryUtils.getAbs(a);
- DocSet positiveA = getPositiveDocSet(absQ, deState);
- return a==absQ ? b.intersectionSize(positiveA) : b.andNotSize(positiveA);
+ return a.intersectionSize(getDocSet(deState));
}
public static class DocsEnumState {
+ public String fieldName; // currently interned for as long as lucene requires it
public TermsEnum termsEnum;
public Bits deletedDocs;
- public DocsEnum reuse;
+ public DocsEnum docsEnum;
+
+ public int minSetSizeCached;
+
+ public int[] scratch;
}
/**
@@ -1706,9 +1697,29 @@ public class SolrIndexSearcher extends I
boolean logme = log.isInfoEnabled();
long warmingStartTime = System.currentTimeMillis();
// warm the caches in order...
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.add("warming","true");
for (int i=0; i<cacheList.length; i++) {
if (logme) log.info("autowarming " + this + " from " + old + "\n\t" + old.cacheList[i]);
- this.cacheList[i].warm(this, old.cacheList[i]);
+
+
+ SolrQueryRequest req = new LocalSolrQueryRequest(core,params) {
+ @Override public SolrIndexSearcher getSearcher() { return SolrIndexSearcher.this; }
+ @Override public void close() { }
+ };
+
+ SolrQueryResponse rsp = new SolrQueryResponse();
+ SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
+ try {
+ this.cacheList[i].warm(this, old.cacheList[i]);
+ } finally {
+ try {
+ req.close();
+ } finally {
+ SolrRequestInfo.clearRequestInfo();
+ }
+ }
+
if (logme) log.info("autowarming result for " + this + "\n\t" + this.cacheList[i]);
}
warmupTime = System.currentTimeMillis() - warmingStartTime;
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/search/SortedIntDocSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/SortedIntDocSet.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/SortedIntDocSet.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/SortedIntDocSet.java Wed Apr 27 01:42:05 2011
@@ -166,6 +166,59 @@ public class SortedIntDocSet extends Doc
return icount;
}
+
+ public static boolean intersects(int[] smallerSortedList, int[] biggerSortedList) {
+ // see intersectionSize for more in-depth comments of this algorithm
+
+ final int a[] = smallerSortedList;
+ final int b[] = biggerSortedList;
+
+ int step = (b.length/a.length)+1;
+
+ step = step + step;
+
+ int low = 0;
+ int max = b.length-1;
+
+ for (int i=0; i<a.length; i++) {
+ int doca = a[i];
+ int high = max;
+ int probe = low + step;
+ if (probe<high) {
+ if (b[probe]>=doca) {
+ high=probe;
+ } else {
+ low=probe+1;
+ probe = low + step;
+ if (probe<high) {
+ if (b[probe]>=doca) {
+ high=probe;
+ } else {
+ low=probe+1;
+ }
+ }
+ }
+ }
+
+ while (low <= high) {
+ int mid = (low+high) >>> 1;
+ int docb = b[mid];
+
+ if (docb < doca) {
+ low = mid+1;
+ }
+ else if (docb > doca) {
+ high = mid-1;
+ }
+ else {
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
public int intersectionSize(DocSet other) {
if (!(other instanceof SortedIntDocSet)) {
// assume other implementations are better at random access than we are,
@@ -215,6 +268,49 @@ public class SortedIntDocSet extends Doc
return icount;
}
+ @Override
+ public boolean intersects(DocSet other) {
+ if (!(other instanceof SortedIntDocSet)) {
+ // assume other implementations are better at random access than we are,
+ // true of BitDocSet and HashDocSet.
+ for (int i=0; i<docs.length; i++) {
+ if (other.exists(docs[i])) return true;
+ }
+ return false;
+ }
+
+ // make "a" the smaller set.
+ int[] otherDocs = ((SortedIntDocSet)other).docs;
+ final int[] a = docs.length < otherDocs.length ? docs : otherDocs;
+ final int[] b = docs.length < otherDocs.length ? otherDocs : docs;
+
+ if (a.length==0) return false;
+
+ // if b is 8 times bigger than a, use the modified binary search.
+ if ((b.length>>3) >= a.length) {
+ return intersects(a,b);
+ }
+
+ // if they are close in size, just do a linear walk of both.
+ int i=0,j=0;
+ int doca=a[i],docb=b[j];
+ for(;;) {
+ // switch on the sign bit somehow? Hopefull JVM is smart enough to just test once.
+
+ // Since set a is less dense then set b, doca is likely to be greater than docb so
+ // check that case first. This resulted in a 13% speedup.
+ if (doca > docb) {
+ if (++j >= b.length) break;
+ docb=b[j];
+ } else if (doca < docb) {
+ if (++i >= a.length) break;
+ doca=a[i];
+ } else {
+ return true;
+ }
+ }
+ return false;
+ }
/** puts the intersection of a and b into the target array and returns the size */
public static int intersection(int a[], int lena, int b[], int lenb, int[] target) {
@@ -463,6 +559,13 @@ public class SortedIntDocSet extends Doc
return new SortedIntDocSet(arr,sz);
}
+ @Override
+ public void setBitsOn(OpenBitSet target) {
+ for (int doc : docs) {
+ target.fastSet(doc);
+ }
+ }
+
public boolean exists(int doc) {
// this could be faster by estimating where in the list the doc is likely to appear,
@@ -653,4 +756,8 @@ public class SortedIntDocSet extends Doc
};
}
+ @Override
+ protected SortedIntDocSet clone() {
+ return new SortedIntDocSet(docs.clone());
+ }
}
Modified: lucene/dev/trunk/solr/src/test-framework/org/apache/solr/SolrTestCaseJ4.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test-framework/org/apache/solr/SolrTestCaseJ4.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test-framework/org/apache/solr/SolrTestCaseJ4.java (original)
+++ lucene/dev/trunk/solr/src/test-framework/org/apache/solr/SolrTestCaseJ4.java Wed Apr 27 01:42:05 2011
@@ -722,6 +722,7 @@ public abstract class SolrTestCaseJ4 ext
}
public static final IRange ZERO_ONE = new IRange(0,1);
+ public static final IRange ZERO_TWO = new IRange(0,2);
public static final IRange ONE_ONE = new IRange(1,1);
public static class Doc implements Comparable{
@@ -1040,6 +1041,29 @@ public abstract class SolrTestCaseJ4 ext
return out.toString();
}
+ /** Return a Map from field value to a list of document ids */
+ Map<Comparable, List<Comparable>> invertField(Map<Comparable, Doc> model, String field) {
+ Map<Comparable, List<Comparable>> value_to_id = new HashMap<Comparable, List<Comparable>>();
+
+ // invert field
+ for (Comparable key : model.keySet()) {
+ Doc doc = model.get(key);
+ List<Comparable> vals = doc.getValues(field);
+ if (vals == null) continue;
+ for (Comparable val : vals) {
+ List<Comparable> ids = value_to_id.get(val);
+ if (ids == null) {
+ ids = new ArrayList<Comparable>(2);
+ value_to_id.put(val, ids);
+ }
+ ids.add(key);
+ }
+ }
+
+ return value_to_id;
+ }
+
+
/** Gets a resource from the context classloader as {@link File}. This method should only be used,
* if a real file is needed. To get a stream, code should prefer
* {@link Class#getResourceAsStream} using {@code this.getClass()}.
Added: lucene/dev/trunk/solr/src/test/org/apache/solr/TestJoin.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/TestJoin.java?rev=1096978&view=auto
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/TestJoin.java (added)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/TestJoin.java Wed Apr 27 01:42:05 2011
@@ -0,0 +1,216 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr;
+
+import org.apache.lucene.search.FieldCache;
+import org.apache.noggit.JSONUtil;
+import org.apache.noggit.ObjectBuilder;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.JsonUpdateRequestHandler;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.request.SolrRequestHandler;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.servlet.DirectSolrConnection;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.*;
+
+public class TestJoin extends SolrTestCaseJ4 {
+
+ @BeforeClass
+ public static void beforeTests() throws Exception {
+ initCore("solrconfig.xml","schema12.xml");
+ }
+
+ @Test
+ public void testJoin() throws Exception {
+ assertU(add(doc("id", "1","name", "john", "title", "Director", "dept_s","Engineering")));
+ assertU(add(doc("id", "2","name", "mark", "title", "VP", "dept_s","Marketing")));
+ assertU(add(doc("id", "3","name", "nancy", "title", "MTS", "dept_s","Sales")));
+ assertU(add(doc("id", "4","name", "dave", "title", "MTS", "dept_s","Support", "dept_s","Engineering")));
+ assertU(add(doc("id", "5","name", "tina", "title", "VP", "dept_s","Engineering")));
+
+ assertU(add(doc("id","10", "dept_id_s", "Engineering", "text","These guys develop stuff")));
+ assertU(add(doc("id","11", "dept_id_s", "Marketing", "text","These guys make you look good")));
+ assertU(add(doc("id","12", "dept_id_s", "Sales", "text","These guys sell stuff")));
+ assertU(add(doc("id","13", "dept_id_s", "Support", "text","These guys help customers")));
+
+ assertU(commit());
+
+ // test debugging
+ assertJQ(req("q","{!join from=dept_s to=dept_id_s}title:MTS", "fl","id", "debugQuery","true")
+ ,"/debug/join/{!join from=dept_s to=dept_id_s}title:MTS=={'_MATCH_':'fromSetSize,toSetSize', 'fromSetSize':2, 'toSetSize':3}"
+ );
+
+ assertJQ(req("q","{!join from=dept_s to=dept_id_s}title:MTS", "fl","id")
+ ,"/response=={'numFound':3,'start':0,'docs':[{'id':'10'},{'id':'12'},{'id':'13'}]}"
+ );
+
+ // empty from
+ assertJQ(req("q","{!join from=noexist_s to=dept_id_s}*:*", "fl","id")
+ ,"/response=={'numFound':0,'start':0,'docs':[]}"
+ );
+
+ // empty to
+ assertJQ(req("q","{!join from=dept_s to=noexist_s}*:*", "fl","id")
+ ,"/response=={'numFound':0,'start':0,'docs':[]}"
+ );
+
+ // self join... return everyone with she same title as Dave
+ assertJQ(req("q","{!join from=title to=title}name:dave", "fl","id")
+ ,"/response=={'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'4'}]}"
+ );
+
+ // find people that develop stuff
+ assertJQ(req("q","{!join from=dept_id_s to=dept_s}text:develop", "fl","id")
+ ,"/response=={'numFound':3,'start':0,'docs':[{'id':'1'},{'id':'4'},{'id':'5'}]}"
+ );
+
+ // self join on multivalued text field
+ assertJQ(req("q","{!join from=title to=title}name:dave", "fl","id")
+ ,"/response=={'numFound':2,'start':0,'docs':[{'id':'3'},{'id':'4'}]}"
+ );
+
+ assertJQ(req("q","{!join from=dept_s to=dept_id_s}title:MTS", "fl","id", "debugQuery","true")
+ ,"/response=={'numFound':3,'start':0,'docs':[{'id':'10'},{'id':'12'},{'id':'13'}]}"
+ );
+
+ }
+
+
+ @Test
+ public void testRandomJoin() throws Exception {
+ int indexIter=50 * RANDOM_MULTIPLIER;
+ int queryIter=50 * RANDOM_MULTIPLIER;
+
+ while (--indexIter >= 0) {
+ int indexSize = random.nextInt(20 * RANDOM_MULTIPLIER);
+
+ List<FldType> types = new ArrayList<FldType>();
+ types.add(new FldType("id",ONE_ONE, new SVal('A','Z',4,4)));
+ types.add(new FldType("score_f",ONE_ONE, new FVal(1,100))); // field used to score
+ types.add(new FldType("small_s",ZERO_ONE, new SVal('a',(char)('c'+indexSize/3),1,1)));
+ types.add(new FldType("small2_s",ZERO_ONE, new SVal('a',(char)('c'+indexSize/3),1,1)));
+ types.add(new FldType("small2_ss",ZERO_TWO, new SVal('a',(char)('c'+indexSize/3),1,1)));
+ types.add(new FldType("small3_ss",new IRange(0,25), new SVal('A','z',1,1)));
+ types.add(new FldType("small_i",ZERO_ONE, new IRange(0,5+indexSize/3)));
+ types.add(new FldType("small2_i",ZERO_ONE, new IRange(0,5+indexSize/3)));
+ types.add(new FldType("small2_is",ZERO_TWO, new IRange(0,5+indexSize/3)));
+ types.add(new FldType("small3_is",new IRange(0,25), new IRange(0,100)));
+
+ clearIndex();
+ Map<Comparable, Doc> model = indexDocs(types, null, indexSize);
+ Map<String, Map<Comparable, Set<Comparable>>> pivots = new HashMap<String, Map<Comparable, Set<Comparable>>>();
+
+ for (int qiter=0; qiter<queryIter; qiter++) {
+ String fromField = types.get(random.nextInt(types.size())).fname;
+ String toField = types.get(random.nextInt(types.size())).fname;
+
+ Map<Comparable, Set<Comparable>> pivot = pivots.get(fromField+"/"+toField);
+ if (pivot == null) {
+ pivot = createJoinMap(model, fromField, toField);
+ pivots.put(fromField+"/"+toField, pivot);
+ }
+
+ Collection<Doc> fromDocs = model.values();
+ Set<Comparable> docs = join(fromDocs, pivot);
+ List<Doc> docList = new ArrayList<Doc>(docs.size());
+ for (Comparable id : docs) docList.add(model.get(id));
+ Collections.sort(docList, createComparator("_docid_",true,false,false,false));
+ List sortedDocs = new ArrayList();
+ for (Doc doc : docList) {
+ if (sortedDocs.size() >= 10) break;
+ sortedDocs.add(doc.toObject(h.getCore().getSchema()));
+ }
+
+ Map<String,Object> resultSet = new LinkedHashMap<String,Object>();
+ resultSet.put("numFound", docList.size());
+ resultSet.put("start", 0);
+ resultSet.put("docs", sortedDocs);
+
+ // todo: use filters
+
+ SolrQueryRequest req = req("wt","json","indent","true", "echoParams","all",
+ "q","{!join from="+fromField+" to="+toField
+ + (random.nextInt(4)==0 ? " fromIndex=collection1" : "")
+ +"}*:*"
+ );
+
+ String strResponse = h.query(req);
+
+ Object realResponse = ObjectBuilder.fromJSON(strResponse);
+ String err = JSONTestUtil.matchObj("/response", realResponse, resultSet);
+ if (err != null) {
+ log.error("GROUPING MISMATCH: " + err
+ + "\n\trequest="+req
+ + "\n\tresult="+strResponse
+ + "\n\texpected="+ JSONUtil.toJSON(resultSet)
+ + "\n\tmodel="+ JSONUtil.toJSON(model)
+ );
+
+ // re-execute the request... good for putting a breakpoint here for debugging
+ String rsp = h.query(req);
+
+ fail(err);
+ }
+
+ }
+ }
+ }
+
+
+ Map<Comparable, Set<Comparable>> createJoinMap(Map<Comparable, Doc> model, String fromField, String toField) {
+ Map<Comparable, Set<Comparable>> id_to_id = new HashMap<Comparable, Set<Comparable>>();
+
+ Map<Comparable, List<Comparable>> value_to_id = invertField(model, toField);
+
+ for (Comparable fromId : model.keySet()) {
+ Doc doc = model.get(fromId);
+ List<Comparable> vals = doc.getValues(fromField);
+ if (vals == null) continue;
+ for (Comparable val : vals) {
+ List<Comparable> toIds = value_to_id.get(val);
+ if (toIds == null) continue;
+ Set<Comparable> ids = id_to_id.get(fromId);
+ if (ids == null) {
+ ids = new HashSet<Comparable>();
+ id_to_id.put(fromId, ids);
+ }
+ for (Comparable toId : toIds)
+ ids.add(toId);
+ }
+ }
+
+ return id_to_id;
+ }
+
+
+ Set<Comparable> join(Collection<Doc> input, Map<Comparable, Set<Comparable>> joinMap) {
+ Set<Comparable> ids = new HashSet<Comparable>();
+ for (Doc doc : input) {
+ Collection<Comparable> output = joinMap.get(doc.id);
+ if (output == null) continue;
+ ids.addAll(output);
+ }
+ return ids;
+ }
+
+}
Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/client/solrj/MultiCoreExampleTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/client/solrj/MultiCoreExampleTestBase.java?rev=1096978&r1=1096977&r2=1096978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/client/solrj/MultiCoreExampleTestBase.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/client/solrj/MultiCoreExampleTestBase.java Wed Apr 27 01:42:05 2011
@@ -26,6 +26,7 @@ import org.apache.solr.common.SolrInputD
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrCore;
import org.apache.solr.util.ExternalPaths;
+import org.junit.Test;
/**
@@ -65,8 +66,8 @@ public abstract class MultiCoreExampleTe
protected abstract SolrServer getSolrCore1();
protected abstract SolrServer getSolrAdmin();
protected abstract SolrServer getSolrCore(String name);
-
+ @Test
public void testMultiCore() throws Exception
{
UpdateRequest up = new UpdateRequest();
@@ -79,6 +80,8 @@ public abstract class MultiCoreExampleTe
// Add something to each core
SolrInputDocument doc = new SolrInputDocument();
doc.setField( "id", "AAA" );
+ doc.setField( "name", "AAA1" );
+ doc.setField( "type", "BBB1" );
doc.setField( "core0", "yup" );
// Add to core0
@@ -96,6 +99,8 @@ public abstract class MultiCoreExampleTe
// Add to core1
doc.setField( "id", "BBB" );
+ doc.setField( "name", "BBB1" );
+ doc.setField( "type", "AAA1" );
doc.setField( "core1", "yup" );
doc.removeField( "core0" );
up.add( doc );
@@ -124,6 +129,12 @@ public abstract class MultiCoreExampleTe
assertEquals( 0, getSolrCore1().query( new SolrQuery( "id:AAA" ) ).getResults().size() );
assertEquals( 1, getSolrCore1().query( new SolrQuery( "id:BBB" ) ).getResults().size() );
+ // cross-core join
+ assertEquals( 0, getSolrCore0().query( new SolrQuery( "{!join from=type to=name}*:*" ) ).getResults().size() ); // normal join
+ assertEquals( 1, getSolrCore0().query( new SolrQuery( "{!join from=type to=name fromIndex=core1}id:BBB" ) ).getResults().size() );
+ assertEquals( 1, getSolrCore1().query( new SolrQuery( "{!join from=type to=name fromIndex=core0}id:AAA" ) ).getResults().size() );
+
+
// Now test reloading it should have a newer open time
String name = "core0";
SolrServer coreadmin = getSolrAdmin();