You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2017/07/25 14:50:00 UTC
lucene-solr:branch_7x: SOLR-11093: add Points to GraphQuery
Repository: lucene-solr
Updated Branches:
refs/heads/branch_7x 12ae76b01 -> 9cd3a2bee
SOLR-11093: add Points to GraphQuery
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/9cd3a2be
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/9cd3a2be
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/9cd3a2be
Branch: refs/heads/branch_7x
Commit: 9cd3a2beee2a0374bdc4c3e6224f63989c7cd8c6
Parents: 12ae76b
Author: yonik <yo...@apache.org>
Authored: Tue Jul 25 10:49:24 2017 -0400
Committer: yonik <yo...@apache.org>
Committed: Tue Jul 25 10:49:55 2017 -0400
----------------------------------------------------------------------
solr/CHANGES.txt | 2 +
.../org/apache/solr/search/facet/UniqueAgg.java | 82 +-----
.../org/apache/solr/search/join/GraphQuery.java | 89 ++-----
.../solr/search/join/GraphQueryParser.java | 1 +
.../solr/search/join/GraphTermsCollector.java | 260 ++++++++++++++++---
.../java/org/apache/solr/util/LongIterator.java | 34 +++
.../src/java/org/apache/solr/util/LongSet.java | 135 ++++++++++
.../org/apache/solr/util/hll/BitVector.java | 2 +
.../src/java/org/apache/solr/util/hll/HLL.java | 1 +
.../org/apache/solr/util/hll/LongIterator.java | 34 ---
.../solr/collection1/conf/schema_latest.xml | 17 ++
.../apache/solr/search/join/GraphQueryTest.java | 23 +-
.../org/apache/solr/util/hll/BitVectorTest.java | 1 +
.../org/apache/solr/util/hll/FullHLLTest.java | 1 +
14 files changed, 464 insertions(+), 218 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index f6ed647..651ae6e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -299,6 +299,8 @@ New Features
* SOLR-10282: bin/solr support for enabling Kerberos authentication (Ishan Chattopadhyaya, Jason Gerlowski)
+* SOLR-11093: Add support for PointFields for {!graph} query. (yonik)
+
Bug Fixes
----------------------
* SOLR-9262: Connection and read timeouts are being ignored by UpdateShardHandler after SOLR-4509.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
index dba410a..4541975 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
@@ -29,6 +29,8 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
+import org.apache.solr.util.LongIterator;
+import org.apache.solr.util.LongSet;
public class UniqueAgg extends StrAggValueSource {
public static String UNIQUE = "unique";
@@ -122,75 +124,6 @@ public class UniqueAgg extends StrAggValueSource {
}
-
- static class LongSet {
-
- static final float LOAD_FACTOR = 0.7f;
-
- long[] vals;
- int cardinality;
- int mask;
- int threshold;
- int zeroCount; // 1 if a 0 was collected
-
- /** sz must be a power of two */
- LongSet(int sz) {
- vals = new long[sz];
- mask = sz - 1;
- threshold = (int) (sz * LOAD_FACTOR);
- }
-
- void add(long val) {
- if (val == 0) {
- zeroCount = 1;
- return;
- }
- if (cardinality >= threshold) {
- rehash();
- }
-
- // For floats: exponent bits start at bit 23 for single precision,
- // and bit 52 for double precision.
- // Many values will only have significant bits just to the right of that,
- // and the leftmost bits will all be zero.
-
- // For now, lets just settle to get first 8 significant mantissa bits of double or float in the lowest bits of our hash
- // The upper bits of our hash will be irrelevant.
- int h = (int) (val + (val >>> 44) + (val >>> 15));
- for (int slot = h & mask; ;slot = (slot + 1) & mask) {
- long v = vals[slot];
- if (v == 0) {
- vals[slot] = val;
- cardinality++;
- break;
- } else if (v == val) {
- // val is already in the set
- break;
- }
- }
- }
-
- private void rehash() {
- long[] oldVals = vals;
- int newCapacity = vals.length << 1;
- vals = new long[newCapacity];
- mask = newCapacity - 1;
- threshold = (int) (newCapacity * LOAD_FACTOR);
- cardinality = 0;
-
- for (long val : oldVals) {
- if (val != 0) {
- add(val);
- }
- }
- }
-
- int cardinality() {
- return cardinality + zeroCount;
- }
- }
-
-
static abstract class BaseNumericAcc extends SlotAcc {
SchemaField sf;
LongSet[] sets;
@@ -259,16 +192,11 @@ public class UniqueAgg extends StrAggValueSource {
if (unique <= maxExplicit) {
List lst = new ArrayList( Math.min(unique, maxExplicit) );
if (set != null) {
- if (set.zeroCount > 0) {
- lst.add(0);
- }
- for (long val : set.vals) {
- if (val != 0) {
- lst.add(val);
- }
+ LongIterator iter = set.iterator();
+ while (iter.hasNext()) {
+ lst.add( iter.next() );
}
}
-
map.add("vals", lst);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java b/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
index db41651..7e52f0c 100644
--- a/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphQuery.java
@@ -25,16 +25,15 @@ import java.util.TreeSet;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
@@ -42,6 +41,7 @@ import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
+import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.Filter;
@@ -130,17 +130,22 @@ public class GraphQuery extends Query {
protected class GraphQueryWeight extends Weight {
final SolrIndexSearcher fromSearcher;
- private final float boost;
- private int frontierSize = 0;
private int currentDepth = -1;
private Filter filter;
private DocSet resultSet;
-
+ SchemaField fromSchemaField;
+ SchemaField toSchemaField;
+
public GraphQueryWeight(SolrIndexSearcher searcher, float boost) {
// Grab the searcher so we can run additional searches.
super(null);
this.fromSearcher = searcher;
- this.boost = boost;
+ this.fromSchemaField = searcher.getSchema().getField(fromField);
+ this.toSchemaField = searcher.getSchema().getField(toField);
+ }
+
+ GraphQuery getGraphQuery() {
+ return GraphQuery.this;
}
@Override
@@ -175,7 +180,7 @@ public class GraphQuery extends Query {
// the initial query for the frontier for the first query
Query frontierQuery = q;
// Find all documents in this graph that are leaf nodes to speed traversal
- DocSet leafNodes = resolveLeafNodes(toField);
+ DocSet leafNodes = resolveLeafNodes();
// Start the breadth first graph traversal.
do {
@@ -187,25 +192,17 @@ public class GraphQuery extends Query {
// if we've reached the max depth, don't worry about collecting edges.
fromSet = fromSearcher.getDocSetBits(frontierQuery);
// explicitly the frontier size is zero now so we can break
- frontierSize = 0;
+ frontierQuery = null;
} else {
// when we're not at the max depth level, we need to collect edges
// Create the graph result collector for this level
- GraphTermsCollector graphResultCollector = new GraphTermsCollector(toField,capacity, resultBits, leafNodes);
+ GraphEdgeCollector graphResultCollector = toSchemaField.getType().isPointField()
+ ? new GraphPointsCollector(this, capacity, resultBits, leafNodes)
+ : new GraphTermsCollector(this, capacity, resultBits, leafNodes);
+
fromSearcher.search(frontierQuery, graphResultCollector);
fromSet = graphResultCollector.getDocSet();
- // All edge ids on the frontier.
- BytesRefHash collectorTerms = graphResultCollector.getCollectorTerms();
- frontierSize = collectorTerms.size();
- // The resulting doc set from the frontier.
- FrontierQuery fq = buildFrontierQuery(collectorTerms, frontierSize);
- if (fq == null) {
- // in case we get null back, make sure we know we're done at this level.
- frontierSize = 0;
- } else {
- frontierQuery = fq.getQuery();
- frontierSize = fq.getFrontierSize();
- }
+ frontierQuery = graphResultCollector.getFrontierQuery();
}
if (currentDepth == 0 && !returnRoot) {
// grab a copy of the root bits but only if we need it.
@@ -217,7 +214,7 @@ public class GraphQuery extends Query {
if ((maxDepth != -1 && currentDepth >= maxDepth)) {
break;
}
- } while (frontierSize > 0);
+ } while (frontierQuery != null);
// helper bit set operations on the final result set
if (!returnRoot) {
resultBits.andNot(rootBits);
@@ -232,9 +229,10 @@ public class GraphQuery extends Query {
}
}
- private DocSet resolveLeafNodes(String field) throws IOException {
+ private DocSet resolveLeafNodes() throws IOException {
+ String field = toSchemaField.getName();
BooleanQuery.Builder leafNodeQuery = new BooleanQuery.Builder();
- WildcardQuery edgeQuery = new WildcardQuery(new Term(field, "*"));
+ Query edgeQuery = toSchemaField.hasDocValues() ? new DocValuesFieldExistsQuery(field) : new WildcardQuery(new Term(field, "*"));
leafNodeQuery.add(edgeQuery, Occur.MUST_NOT);
DocSet leafNodes = fromSearcher.getDocSet(leafNodeQuery.build());
return leafNodes;
@@ -252,50 +250,7 @@ public class GraphQuery extends Query {
final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
return a;
}
-
- /**
- * This return a query that represents the documents that match the next hop in the query.
- *
- * collectorTerms - the terms that represent the edge ids for the current frontier.
- * frontierSize - the size of the frontier query (number of unique edges)
- *
- */
- public FrontierQuery buildFrontierQuery(BytesRefHash collectorTerms, Integer frontierSize) {
- if (collectorTerms == null || collectorTerms.size() == 0) {
- // return null if there are no terms (edges) to traverse.
- return null;
- } else {
- // Create a query
- Query q = null;
- // TODO: see if we should dynamically select this based on the frontier size.
- if (useAutn) {
- // build an automaton based query for the frontier.
- Automaton autn = buildAutomaton(collectorTerms);
- AutomatonQuery autnQuery = new AutomatonQuery(new Term(fromField), autn);
- q = autnQuery;
- } else {
- List<BytesRef> termList = new ArrayList<>(collectorTerms.size());
- for (int i = 0 ; i < collectorTerms.size(); i++) {
- BytesRef ref = new BytesRef();
- collectorTerms.get(i, ref);
- termList.add(ref);
- }
- q = new TermInSetQuery(fromField, termList);
- }
-
- // If there is a filter to be used while crawling the graph, add that.
- if (traversalFilter != null) {
- BooleanQuery.Builder builder = new BooleanQuery.Builder();
- builder.add(q, Occur.MUST);
- builder.add(traversalFilter, Occur.MUST);
- q = builder.build();
- }
- // return the new query.
- FrontierQuery frontier = new FrontierQuery(q, frontierSize);
- return frontier;
- }
- }
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java b/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java
index 0ef9e6c..9c9b852 100644
--- a/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphQueryParser.java
@@ -41,6 +41,7 @@ public class GraphQueryParser extends QParser {
String traversalFilterS = localParams.get("traversalFilter");
Query traversalFilter = traversalFilterS == null ? null : subQuery(traversalFilterS, null).getQuery();
+ // NOTE: the from/to are reversed from {!join}
String fromField = localParams.get("from", "node_id");
String toField = localParams.get("to", "edge_ids");
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
index 377f71b..f32c83b 100644
--- a/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphTermsCollector.java
@@ -17,19 +17,40 @@
package org.apache.solr.search.join;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.TreeSet;
+import org.apache.lucene.document.DoublePoint;
+import org.apache.lucene.document.FloatPoint;
+import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.AutomatonQuery;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Query;
import org.apache.lucene.search.SimpleCollector;
+import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.automaton.Automaton;
+import org.apache.lucene.util.automaton.DaciukMihovAutomatonBuilder;
+import org.apache.solr.schema.NumberType;
+import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
+import org.apache.solr.util.LongIterator;
+import org.apache.solr.util.LongSet;
/**
* A graph hit collector. This accumulates the edges for a given graph traversal.
@@ -37,17 +58,14 @@ import org.apache.solr.search.DocSet;
* already traversed.
* @lucene.internal
*/
-class GraphTermsCollector extends SimpleCollector implements Collector {
-
- // the field to collect edge ids from
- private String field;
- // all the collected terms
- private BytesRefHash collectorTerms;
- private SortedSetDocValues docTermOrds;
+abstract class GraphEdgeCollector extends SimpleCollector implements Collector {
+
+ GraphQuery.GraphQueryWeight weight;
+
// the result set that is being collected.
- private Bits currentResult;
+ Bits currentResult;
// known leaf nodes
- private DocSet leafNodes;
+ DocSet leafNodes;
// number of hits discovered at this level.
int numHits=0;
BitSet bits;
@@ -56,11 +74,10 @@ class GraphTermsCollector extends SimpleCollector implements Collector {
int baseInParent;
// if we care to track this.
boolean hasCycles = false;
-
- GraphTermsCollector(String field,int maxDoc, Bits currentResult, DocSet leafNodes) {
- this.field = field;
+
+ GraphEdgeCollector(GraphQuery.GraphQueryWeight weight, int maxDoc, Bits currentResult, DocSet leafNodes) {
+ this.weight = weight;
this.maxDoc = maxDoc;
- this.collectorTerms = new BytesRefHash();
this.currentResult = currentResult;
this.leafNodes = leafNodes;
if (bits==null) {
@@ -80,29 +97,14 @@ class GraphTermsCollector extends SimpleCollector implements Collector {
// collect the docs
addDocToResult(doc);
// Optimization to not look up edges for a document that is a leaf node
- if (!leafNodes.exists(doc)) {
+ if (leafNodes == null || !leafNodes.exists(doc)) {
addEdgeIdsToResult(doc-base);
}
// Note: tracking links in for each result would be a huge memory hog... so not implementing at this time.
}
- private void addEdgeIdsToResult(int doc) throws IOException {
- // set the doc to pull the edges ids for.
- if (doc > docTermOrds.docID()) {
- docTermOrds.advance(doc);
- }
- if (doc == docTermOrds.docID()) {
- BytesRef edgeValue = new BytesRef();
- long ord;
- while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
- // TODO: handle non string type fields.
- edgeValue = docTermOrds.lookupOrd(ord);
- // add the edge id to the collector terms.
- collectorTerms.add(edgeValue);
- }
- }
- }
+ abstract void addEdgeIdsToResult(int doc) throws IOException;
private void addDocToResult(int docWithBase) {
// this document is part of the traversal. mark it in our bitmap.
@@ -121,14 +123,25 @@ class GraphTermsCollector extends SimpleCollector implements Collector {
@Override
public void doSetNextReader(LeafReaderContext context) throws IOException {
- // Grab the updated doc values.
- docTermOrds = DocValues.getSortedSet(context.reader(), field);
base = context.docBase;
baseInParent = context.docBaseInParent;
}
-
- public BytesRefHash getCollectorTerms() {
- return collectorTerms;
+
+ protected abstract Query getResultQuery();
+
+ public Query getFrontierQuery() {
+ Query q = getResultQuery();
+ if (q == null) return null;
+
+ // If there is a filter to be used while crawling the graph, add that.
+ if (weight.getGraphQuery().getTraversalFilter() != null) {
+ BooleanQuery.Builder builder = new BooleanQuery.Builder();
+ builder.add(q, BooleanClause.Occur.MUST);
+ builder.add(weight.getGraphQuery().getTraversalFilter(), BooleanClause.Occur.MUST);
+ q = builder.build();
+ }
+
+ return q;
}
@Override
@@ -137,3 +150,180 @@ class GraphTermsCollector extends SimpleCollector implements Collector {
}
}
+
+class GraphTermsCollector extends GraphEdgeCollector {
+ // all the collected terms
+ private BytesRefHash collectorTerms;
+ private SortedSetDocValues docTermOrds;
+
+
+ GraphTermsCollector(GraphQuery.GraphQueryWeight weight, int maxDoc, Bits currentResult, DocSet leafNodes) {
+ super(weight, maxDoc, currentResult, leafNodes);
+ this.collectorTerms = new BytesRefHash();
+ }
+
+ @Override
+ public void doSetNextReader(LeafReaderContext context) throws IOException {
+ super.doSetNextReader(context);
+ // Grab the updated doc values.
+ docTermOrds = DocValues.getSortedSet(context.reader(), weight.getGraphQuery().getToField());
+ }
+
+ @Override
+ void addEdgeIdsToResult(int doc) throws IOException {
+ // set the doc to pull the edges ids for.
+ if (doc > docTermOrds.docID()) {
+ docTermOrds.advance(doc);
+ }
+ if (doc == docTermOrds.docID()) {
+ BytesRef edgeValue = new BytesRef();
+ long ord;
+ while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+ edgeValue = docTermOrds.lookupOrd(ord);
+ // add the edge id to the collector terms.
+ collectorTerms.add(edgeValue);
+ }
+ }
+ }
+
+ @Override
+ protected Query getResultQuery() {
+ if (collectorTerms == null || collectorTerms.size() == 0) {
+ // return null if there are no terms (edges) to traverse.
+ return null;
+ } else {
+ // Create a query
+ Query q = null;
+
+ GraphQuery gq = weight.getGraphQuery();
+ // TODO: see if we should dynamically select this based on the frontier size.
+ if (gq.isUseAutn()) {
+ // build an automaton based query for the frontier.
+ Automaton autn = buildAutomaton(collectorTerms);
+ AutomatonQuery autnQuery = new AutomatonQuery(new Term(gq.getFromField()), autn);
+ q = autnQuery;
+ } else {
+ List<BytesRef> termList = new ArrayList<>(collectorTerms.size());
+ for (int i = 0 ; i < collectorTerms.size(); i++) {
+ BytesRef ref = new BytesRef();
+ collectorTerms.get(i, ref);
+ termList.add(ref);
+ }
+ q = new TermInSetQuery(gq.getFromField(), termList);
+ }
+
+ return q;
+ }
+ }
+
+
+ /** Build an automaton to represent the frontier query */
+ private Automaton buildAutomaton(BytesRefHash termBytesHash) {
+ // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
+ final TreeSet<BytesRef> terms = new TreeSet<BytesRef>();
+ for (int i = 0 ; i < termBytesHash.size(); i++) {
+ BytesRef ref = new BytesRef();
+ termBytesHash.get(i, ref);
+ terms.add(ref);
+ }
+ final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
+ return a;
+ }
+}
+
+
+class GraphPointsCollector extends GraphEdgeCollector {
+ final LongSet set = new LongSet(256);
+
+ SortedNumericDocValues values = null;
+
+ GraphPointsCollector(GraphQuery.GraphQueryWeight weight, int maxDoc, Bits currentResult, DocSet leafNodes) {
+ super(weight, maxDoc, currentResult, leafNodes);
+ }
+
+ @Override
+ public void doSetNextReader(LeafReaderContext context) throws IOException {
+ super.doSetNextReader(context);
+ values = DocValues.getSortedNumeric(context.reader(), weight.getGraphQuery().getToField());
+ }
+
+ @Override
+ void addEdgeIdsToResult(int doc) throws IOException {
+ // set the doc to pull the edges ids for.
+ int valuesDoc = values.docID();
+ if (valuesDoc < doc) {
+ valuesDoc = values.advance(doc);
+ }
+ if (valuesDoc == doc) {
+ int count = values.docValueCount();
+ for (int i = 0; i < count; i++) {
+ long v = values.nextValue();
+ set.add(v);
+ }
+ }
+ }
+
+ @Override
+ protected Query getResultQuery() {
+ if (set.cardinality() == 0) return null;
+
+ Query q = null;
+ SchemaField sfield = weight.fromSchemaField;
+ NumberType ntype = sfield.getType().getNumberType();
+ boolean multiValued = sfield.multiValued();
+
+ if (ntype == NumberType.LONG || ntype == NumberType.DATE) {
+ long[] vals = new long[set.cardinality()];
+ int i = 0;
+ for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
+ long bits = iter.next();
+ long v = bits;
+ vals[i++] = v;
+ }
+ q = LongPoint.newSetQuery(sfield.getName(), vals);
+ } else if (ntype == NumberType.INTEGER) {
+ int[] vals = new int[set.cardinality()];
+ int i = 0;
+ for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
+ long bits = iter.next();
+ int v = (int)bits;
+ vals[i++] = v;
+ }
+ q = IntPoint.newSetQuery(sfield.getName(), vals);
+ } else if (ntype == NumberType.DOUBLE) {
+ double[] vals = new double[set.cardinality()];
+ int i = 0;
+ for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
+ long bits = iter.next();
+ double v = multiValued ? NumericUtils.sortableLongToDouble(bits) : Double.longBitsToDouble(bits);
+ vals[i++] = v;
+ }
+ q = DoublePoint.newSetQuery(sfield.getName(), vals);
+ } else if (ntype == NumberType.FLOAT) {
+ float[] vals = new float[set.cardinality()];
+ int i = 0;
+ for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
+ long bits = iter.next();
+ float v = multiValued ? NumericUtils.sortableIntToFloat((int) bits) : Float.intBitsToFloat((int) bits);
+ vals[i++] = v;
+ }
+ q = FloatPoint.newSetQuery(sfield.getName(), vals);
+ }
+
+ return q;
+ }
+
+
+ /** Build an automaton to represent the frontier query */
+ private Automaton buildAutomaton(BytesRefHash termBytesHash) {
+ // need top pass a sorted set of terms to the autn builder (maybe a better way to avoid this?)
+ final TreeSet<BytesRef> terms = new TreeSet<BytesRef>();
+ for (int i = 0 ; i < termBytesHash.size(); i++) {
+ BytesRef ref = new BytesRef();
+ termBytesHash.get(i, ref);
+ terms.add(ref);
+ }
+ final Automaton a = DaciukMihovAutomatonBuilder.build(terms);
+ return a;
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/java/org/apache/solr/util/LongIterator.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/util/LongIterator.java b/solr/core/src/java/org/apache/solr/util/LongIterator.java
new file mode 100644
index 0000000..654c9a5
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/util/LongIterator.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.util;
+
+/**
+ * A <code>long</code>-based iterator. This is not <i>is-a</i> {@link java.util.Iterator}
+ * to prevent autoboxing between <code>Long</code> and <code>long</code>.
+ */
+public interface LongIterator {
+ /**
+ * @return <code>true</code> if and only if there are more elements to
+ * iterate over. <code>false</code> otherwise.
+ */
+ boolean hasNext();
+
+ /**
+ * @return the next <code>long</code> in the collection. Only valid after hasNext() has been called and returns true.
+ */
+ long next();
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/java/org/apache/solr/util/LongSet.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/util/LongSet.java b/solr/core/src/java/org/apache/solr/util/LongSet.java
new file mode 100644
index 0000000..e649e04
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/util/LongSet.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.util;
+
+
+/** Collects long values in a hash set (closed hashing on power-of-two sized long[])
+ * @lucene.internal
+ */
+public class LongSet {
+
+ private static final float LOAD_FACTOR = 0.7f;
+
+ private long[] vals;
+ private int cardinality;
+ private int mask;
+ private int threshold;
+ private int zeroCount; // 1 if a 0 was collected
+
+ public LongSet(int sz) {
+ sz = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(sz), 2);
+ vals = new long[sz];
+ mask = sz - 1;
+ threshold = (int) (sz * LOAD_FACTOR);
+ }
+
+ /** Returns the long[] array that has entries filled in with values or "0" for empty.
+ * To see if "0" itself is in the set, call containsZero()
+ */
+ public long[] getBackingArray() {
+ return vals;
+ }
+
+ public boolean containsZero() {
+ return zeroCount != 0;
+ }
+
+ /** Adds an additional value to the set */
+ public void add(long val) {
+ if (val == 0) {
+ zeroCount = 1;
+ return;
+ }
+ if (cardinality >= threshold) {
+ rehash();
+ }
+
+ // For floats: exponent bits start at bit 23 for single precision,
+ // and bit 52 for double precision.
+ // Many values will only have significant bits just to the right of that.
+
+ // For now, lets just settle to get first 8 significant mantissa bits of double or float in the lowest bits of our hash
+ // The upper bits of our hash will be irrelevant.
+ int h = (int) (val + (val >>> 44) + (val >>> 15));
+ for (int slot = h & mask; ; slot = (slot + 1) & mask) {
+ long v = vals[slot];
+ if (v == 0) {
+ vals[slot] = val;
+ cardinality++;
+ break;
+ } else if (v == val) {
+ // val is already in the set
+ break;
+ }
+ }
+ }
+
+ private void rehash() {
+ long[] oldVals = vals;
+ int newCapacity = vals.length << 1;
+ vals = new long[newCapacity];
+ mask = newCapacity - 1;
+ threshold = (int) (newCapacity * LOAD_FACTOR);
+ cardinality = 0;
+
+ for (long val : oldVals) {
+ if (val != 0) {
+ add(val);
+ }
+ }
+ }
+
+ /** The number of values in the set */
+ public int cardinality() {
+ return cardinality + zeroCount;
+ }
+
+
+ /** Returns an iterator over the values in the set.
+ * hasNext() must return true for next() to return a valid value.
+ */
+ public LongIterator iterator() {
+ return new LongIterator() {
+ private boolean hasNext = zeroCount > 0;
+ private int i = -1;
+ private long value = 0;
+
+ @Override
+ public boolean hasNext() {
+ if (hasNext) {
+ // this is only executed the first time for the special case 0 value
+ return true;
+ }
+ while (++i < vals.length) {
+ value = vals[i];
+ if (value != 0) {
+ return hasNext = true;
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public long next() {
+ hasNext = false;
+ return value;
+ }
+
+ };
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/java/org/apache/solr/util/hll/BitVector.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/util/hll/BitVector.java b/solr/core/src/java/org/apache/solr/util/hll/BitVector.java
index 2545e43..6c62b1e 100644
--- a/solr/core/src/java/org/apache/solr/util/hll/BitVector.java
+++ b/solr/core/src/java/org/apache/solr/util/hll/BitVector.java
@@ -16,6 +16,8 @@
*/
package org.apache.solr.util.hll;
+import org.apache.solr.util.LongIterator;
+
/**
* A vector (array) of bits that is accessed in units ("registers") of <code>width</code>
* bits which are stored as 64bit "words" (<code>long</code>s). In this context
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/java/org/apache/solr/util/hll/HLL.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/util/hll/HLL.java b/solr/core/src/java/org/apache/solr/util/hll/HLL.java
index 6bcaee4..26bfa89 100644
--- a/solr/core/src/java/org/apache/solr/util/hll/HLL.java
+++ b/solr/core/src/java/org/apache/solr/util/hll/HLL.java
@@ -22,6 +22,7 @@ import com.carrotsearch.hppc.IntByteHashMap;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.cursors.IntByteCursor;
import com.carrotsearch.hppc.cursors.LongCursor;
+import org.apache.solr.util.LongIterator;
/**
* A probabilistic set of hashed <code>long</code> elements. Useful for computing
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java b/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java
deleted file mode 100644
index a584ccc..0000000
--- a/solr/core/src/java/org/apache/solr/util/hll/LongIterator.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.util.hll;
-
-/**
- * A <code>long</code>-based iterator. This is not <i>is-a</i> {@link java.util.Iterator}
- * to prevent autoboxing between <code>Long</code> and <code>long</code>.
- */
-interface LongIterator {
- /**
- * @return <code>true</code> if and only if there are more elements to
- * iterate over. <code>false</code> otherwise.
- */
- boolean hasNext();
-
- /**
- * @return the next <code>long</code> in the collection.
- */
- long next();
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml b/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml
index 127b291..1135d20 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema_latest.xml
@@ -240,6 +240,16 @@
<dynamicField name="*_dtdS" type="date" indexed="true" stored="true" docValues="true"/>
<dynamicField name="*_dtdsS" type="date" indexed="true" stored="true" multiValued="true" docValues="true"/>
+ <!-- explicit points with docValues (since they can't be uninverted with FieldCache -->
+ <dynamicField name="*_ip" type="pint" indexed="true" stored="true" docValues="true" multiValued="false"/>
+ <dynamicField name="*_ips" type="pint" indexed="true" stored="true" docValues="true" multiValued="true"/>
+ <dynamicField name="*_lp" type="plong" indexed="true" stored="true" docValues="true" multiValued="false"/>
+ <dynamicField name="*_lps" type="plong" indexed="true" stored="true" docValues="true" multiValued="true"/>
+ <dynamicField name="*_fp" type="pfloat" indexed="true" stored="true" docValues="true" multiValued="false"/>
+ <dynamicField name="*_fps" type="pfloat" indexed="true" stored="true" docValues="true" multiValued="true"/>
+ <dynamicField name="*_dp" type="pdouble" indexed="true" stored="true" docValues="true" multiValued="false"/>
+ <dynamicField name="*_dps" type="pdouble" indexed="true" stored="true" docValues="true" multiValued="true"/>
+
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_bs" type="boolean" indexed="true" stored="true" multiValued="true"/>
@@ -354,6 +364,13 @@
field first in an ascending sort and last in a descending sort.
-->
+ <!-- Point Fields -->
+ <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
+ <fieldType name="plong" class="solr.LongPointField" docValues="true"/>
+ <fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>
+ <fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
+ <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
+
<!--
Default numeric field types. For faster range queries, consider the tint/tfloat/tlong/tdouble types.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
index b8f8fc8..22e532c 100644
--- a/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
+++ b/solr/core/src/test/org/apache/solr/search/join/GraphQueryTest.java
@@ -31,17 +31,23 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
@Test
public void testGraph() throws Exception {
+ // normal strings
doGraph( params("node_id","node_s", "edge_id","edge_ss") );
- // TODO: try with numeric fields... doGraph( params("node_id","node_i", "edge_id","edge_is") );
+
+ // point based fields with docvalues
+ doGraph( params("node_id","node_ip", "edge_id","edge_ips") );
+ doGraph( params("node_id","node_lp", "edge_id","edge_lps") );
+ doGraph( params("node_id","node_fp", "edge_id","edge_fps") );
+ doGraph( params("node_id","node_dp", "edge_id","edge_dps") );
}
public void doGraph(SolrParams p) throws Exception {
String node_id = p.get("node_id");
String edge_id = p.get("edge_id");
-
- // 1 -> 2 -> 3 -> ( 4 5 )
- // 7 -> 1
- // 8 -> ( 1 2 )
+
+ // NOTE: from/to fields are reversed from {!join}... values are looked up in the "toField" and then matched on the "fromField"
+ // 1->2->(3,9)->(4,5)->7
+ // 8->(1,2)->...
assertU(adoc("id", "doc_1", node_id, "1", edge_id, "2", "text", "foo", "title", "foo10" ));
assertU(adoc("id", "doc_2", node_id, "2", edge_id, "3", "text", "foo" ));
assertU(commit());
@@ -71,6 +77,13 @@ public class GraphQueryTest extends SolrTestCaseJ4 {
assertJQ(req(p, "q","{!graph from=${node_id} to=${edge_id}}id:doc_1")
, "/response/numFound==7"
);
+
+ // reverse the order to test single/multi-valued on the opposite fields
+ // start with doc1, look up node_id (1) and match to edge_id (docs 7 and 8)
+ assertJQ(req(p, "q","{!graph from=${edge_id} to=${node_id} maxDepth=1}id:doc_1")
+ , "/response/numFound==3"
+ );
+
assertJQ(req(p, "q","{!graph from=${node_id} to=${edge_id} returnRoot=true returnOnlyLeaf=false}id:doc_8")
, "/response/numFound==8"
);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/test/org/apache/solr/util/hll/BitVectorTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/util/hll/BitVectorTest.java b/solr/core/src/test/org/apache/solr/util/hll/BitVectorTest.java
index 0107af1..c84d817 100644
--- a/solr/core/src/test/org/apache/solr/util/hll/BitVectorTest.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/BitVectorTest.java
@@ -19,6 +19,7 @@ package org.apache.solr.util.hll;
import java.util.Locale;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.util.LongIterator;
import org.junit.Test;
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9cd3a2be/solr/core/src/test/org/apache/solr/util/hll/FullHLLTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/util/hll/FullHLLTest.java b/solr/core/src/test/org/apache/solr/util/hll/FullHLLTest.java
index aef4838..6be2656 100644
--- a/solr/core/src/test/org/apache/solr/util/hll/FullHLLTest.java
+++ b/solr/core/src/test/org/apache/solr/util/hll/FullHLLTest.java
@@ -17,6 +17,7 @@
package org.apache.solr.util.hll;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.util.LongIterator;
import org.junit.Test;
/**