You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by nk...@apache.org on 2016/02/09 21:19:28 UTC
lucene-solr git commit: LUCENE-7019: add two-phase iteration to
GeoPointTermQueryConstantScoreWrapper
Repository: lucene-solr
Updated Branches:
refs/heads/master b47eeb2bb -> a928e4b40
LUCENE-7019: add two-phase iteration to GeoPointTermQueryConstantScoreWrapper
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/a928e4b4
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/a928e4b4
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/a928e4b4
Branch: refs/heads/master
Commit: a928e4b40652cad760cf2d596db08370c07dfc2f
Parents: b47eeb2
Author: nknize <nk...@apache.org>
Authored: Tue Feb 9 14:13:56 2016 -0600
Committer: nknize <nk...@apache.org>
Committed: Tue Feb 9 14:19:06 2016 -0600
----------------------------------------------------------------------
lucene/CHANGES.txt | 10 +++
.../GeoPointTermQueryConstantScoreWrapper.java | 88 +++++++++++---------
2 files changed, 60 insertions(+), 38 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a928e4b4/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index b729f77..e86b0ae 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -294,6 +294,16 @@ Other
TestSortingMergePolicy now extend it, TestUpgradeIndexMergePolicy added)
(Christine Poerschke)
+======================= Lucene 5.4.2 =======================
+
+Bug Fixes
+
+* LUCENE-7018: Fix GeoPointTermQueryConstantScoreWrapper to add document on
+ first GeoPointField match. (Nick Knize)
+
+* LUCENE-7019: add two-phase iteration to GeoPointTermQueryConstantScoreWrapper.
+ (Robert Muir via Nick Knize)
+
======================= Lucene 5.4.1 =======================
Bug Fixes
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/a928e4b4/lucene/spatial/src/java/org/apache/lucene/spatial/search/GeoPointTermQueryConstantScoreWrapper.java
----------------------------------------------------------------------
diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/search/GeoPointTermQueryConstantScoreWrapper.java b/lucene/spatial/src/java/org/apache/lucene/spatial/search/GeoPointTermQueryConstantScoreWrapper.java
index 1097add..46a1783 100644
--- a/lucene/spatial/src/java/org/apache/lucene/spatial/search/GeoPointTermQueryConstantScoreWrapper.java
+++ b/lucene/spatial/src/java/org/apache/lucene/spatial/search/GeoPointTermQueryConstantScoreWrapper.java
@@ -23,7 +23,6 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.Terms;
-import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSet;
@@ -31,8 +30,12 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
+import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.DocIdSetBuilder;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.SparseFixedBitSet;
import static org.apache.lucene.spatial.util.GeoEncodingUtils.mortonUnhashLat;
import static org.apache.lucene.spatial.util.GeoEncodingUtils.mortonUnhashLon;
@@ -74,67 +77,76 @@ final class GeoPointTermQueryConstantScoreWrapper <Q extends GeoPointMultiTermQu
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new ConstantScoreWeight(this) {
- private DocIdSet getDocIDs(LeafReaderContext context) throws IOException {
+ @Override
+ public Scorer scorer(LeafReaderContext context) throws IOException {
final Terms terms = context.reader().terms(query.getField());
if (terms == null) {
- return DocIdSet.EMPTY;
+ return null;
}
final GeoPointTermsEnum termsEnum = (GeoPointTermsEnum)(query.getTermsEnum(terms, null));
assert termsEnum != null;
LeafReader reader = context.reader();
+ // approximation (postfiltering has not yet been applied)
DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
+ // subset of documents that need no postfiltering, this is purely an optimization
+ final BitSet preApproved;
+ // dumb heuristic: if the field is really sparse, use a sparse impl
+ if (terms.getDocCount() * 100L < reader.maxDoc()) {
+ preApproved = new SparseFixedBitSet(reader.maxDoc());
+ } else {
+ preApproved = new FixedBitSet(reader.maxDoc());
+ }
PostingsEnum docs = null;
- SortedNumericDocValues sdv = reader.getSortedNumericDocValues(query.getField());
while (termsEnum.next() != null) {
docs = termsEnum.postings(docs, PostingsEnum.NONE);
- // boundary terms need post filtering by
+ // boundary terms need post filtering
if (termsEnum.boundaryTerm()) {
- int docId = docs.nextDoc();
- long hash;
- do {
- sdv.setDocument(docId);
- for (int i=0; i<sdv.count(); ++i) {
- hash = sdv.valueAt(i);
- if (termsEnum.postFilter(mortonUnhashLon(hash), mortonUnhashLat(hash))) {
- builder.add(docId);
- break;
- }
- }
- } while ((docId = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS);
- } else {
builder.add(docs);
+ } else {
+ int docId;
+ while ((docId = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+ builder.add(docId);
+ preApproved.set(docId);
+ }
}
}
- return builder.build();
- }
-
- private Scorer scorer(DocIdSet set) throws IOException {
- if (set == null) {
- return null;
- }
+ DocIdSet set = builder.build();
final DocIdSetIterator disi = set.iterator();
if (disi == null) {
return null;
}
- return new ConstantScoreScorer(this, score(), disi);
- }
- @Override
- public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
- final Scorer scorer = scorer(getDocIDs(context));
- if (scorer == null) {
- return null;
- }
- return new DefaultBulkScorer(scorer);
- }
+ // return two-phase iterator using docvalues to postfilter candidates
+ SortedNumericDocValues sdv = reader.getSortedNumericDocValues(query.getField());
+ TwoPhaseIterator iterator = new TwoPhaseIterator(disi) {
+ @Override
+ public boolean matches() throws IOException {
+ int docId = disi.docID();
+ if (preApproved.get(docId)) {
+ return true;
+ } else {
+ sdv.setDocument(docId);
+ int count = sdv.count();
+ for (int i = 0; i < count; i++) {
+ long hash = sdv.valueAt(i);
+ if (termsEnum.postFilter(mortonUnhashLon(hash), mortonUnhashLat(hash))) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
- @Override
- public Scorer scorer(LeafReaderContext context) throws IOException {
- return scorer(getDocIDs(context));
+ @Override
+ public float matchCost() {
+ return 20; // TODO: make this fancier
+ }
+ };
+ return new ConstantScoreScorer(this, score(), iterator);
}
};
}