You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ro...@apache.org on 2015/09/14 10:52:38 UTC
svn commit: r1702872 - in /lucene/dev/trunk: lucene/
lucene/core/src/java/org/apache/lucene/search/payloads/
lucene/core/src/test/org/apache/lucene/search/payloads/
lucene/highlighter/src/test/org/apache/lucene/search/highlight/
solr/core/src/test/org/...
Author: romseygeek
Date: Mon Sep 14 08:52:38 2015
New Revision: 1702872
URL: http://svn.apache.org/r1702872
Log:
LUCENE-6716: Change SpanPayloadCheckQuery to take List<BytesRef>
Added:
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadCheckQuery.java (with props)
Removed:
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadBasics.java
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java
lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1702872&r1=1702871&r2=1702872&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Sep 14 08:52:38 2015
@@ -82,6 +82,9 @@ API Changes
In order to apply boosts, you now need to wrap queries in a BoostQuery.
(Adrien Grand)
+* LUCENE-6716: SpanPayloadCheckQuery now takes a List<BytesRef> rather than
+ a Collection<byte[]>. (Alan Woodward)
+
Optimizations
* LUCENE-6708: TopFieldCollector does not compute the score several times on the
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java?rev=1702872&r1=1702871&r2=1702872&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/search/payloads/SpanPayloadCheckQuery.java Mon Sep 14 08:52:38 2015
@@ -17,13 +17,12 @@ package org.apache.lucene.search.payload
*/
import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Iterator;
+import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
@@ -32,10 +31,12 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.FilterSpans;
import org.apache.lucene.search.spans.FilterSpans.AcceptStatus;
+import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ToStringUtils;
/**
@@ -43,14 +44,14 @@ import org.apache.lucene.util.ToStringUt
*/
public class SpanPayloadCheckQuery extends SpanQuery {
- protected final Collection<byte[]> payloadToMatch;
+ protected final List<BytesRef> payloadToMatch;
protected final SpanQuery match;
/**
* @param match The underlying {@link org.apache.lucene.search.spans.SpanQuery} to check
- * @param payloadToMatch The {@link java.util.Collection} of payloads to match
+ * @param payloadToMatch The {@link java.util.List} of payloads to match
*/
- public SpanPayloadCheckQuery(SpanQuery match, Collection<byte[]> payloadToMatch) {
+ public SpanPayloadCheckQuery(SpanQuery match, List<BytesRef> payloadToMatch) {
this.match = match;
this.payloadToMatch = payloadToMatch;
}
@@ -90,14 +91,14 @@ public class SpanPayloadCheckQuery exten
@Override
public Spans getSpans(final LeafReaderContext context, Postings requiredPostings) throws IOException {
- final PayloadSpanCollector collector = new PayloadSpanCollector();
+ final PayloadChecker collector = new PayloadChecker();
Spans matchSpans = matchWeight.getSpans(context, requiredPostings.atLeast(Postings.PAYLOADS));
return (matchSpans == null) ? null : new FilterSpans(matchSpans) {
@Override
protected AcceptStatus accept(Spans candidate) throws IOException {
collector.reset();
candidate.collect(collector);
- return checkPayloads(collector.getPayloads());
+ return collector.match();
}
};
}
@@ -118,27 +119,42 @@ public class SpanPayloadCheckQuery exten
}
}
- /**
- * Check to see if the collected payloads match the required set.
- *
- * @param candidate a collection of payloads from the current Spans
- * @return whether or not the payloads match
- */
- protected AcceptStatus checkPayloads(Collection<byte[]> candidate) {
- if (candidate.size() == payloadToMatch.size()){
- //TODO: check the byte arrays are the same
- Iterator<byte[]> toMatchIter = payloadToMatch.iterator();
- //check each of the byte arrays, in order
- for (byte[] candBytes : candidate) {
- //if one is a mismatch, then return false
- if (Arrays.equals(candBytes, toMatchIter.next()) == false){
- return AcceptStatus.NO;
- }
+ private class PayloadChecker implements SpanCollector {
+
+ int upto = 0;
+ boolean matches = true;
+
+ @Override
+ public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
+ if (!matches)
+ return;
+ if (upto >= payloadToMatch.size()) {
+ matches = false;
+ return;
+ }
+ BytesRef payload = postings.getPayload();
+ if (payloadToMatch.get(upto) == null) {
+ matches = payload == null;
+ upto++;
+ return;
}
- //we've verified all the bytes
- return AcceptStatus.YES;
- } else {
- return AcceptStatus.NO;
+ if (payload == null) {
+ matches = false;
+ upto++;
+ return;
+ }
+ matches = payloadToMatch.get(upto).bytesEquals(payload);
+ upto++;
+ }
+
+ AcceptStatus match() {
+ return matches && upto == payloadToMatch.size() ? AcceptStatus.YES : AcceptStatus.NO;
+ }
+
+ @Override
+ public void reset() {
+ this.upto = 0;
+ this.matches = true;
}
}
@@ -148,8 +164,8 @@ public class SpanPayloadCheckQuery exten
buffer.append("spanPayCheck(");
buffer.append(match.toString(field));
buffer.append(", payloadRef: ");
- for (byte[] bytes : payloadToMatch) {
- ToStringUtils.byteArray(buffer, bytes);
+ for (BytesRef bytes : payloadToMatch) {
+ buffer.append(bytes.utf8ToString());
buffer.append(';');
}
buffer.append(")");
Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadCheckQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadCheckQuery.java?rev=1702872&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadCheckQuery.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/search/payloads/TestPayloadCheckQuery.java Mon Sep 14 08:52:38 2015
@@ -0,0 +1,192 @@
+package org.apache.lucene.search.payloads;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.SimplePayloadFilter;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.CheckHits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanPositionRangeQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.English;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+/** basic test of payload-spans */
+public class TestPayloadCheckQuery extends LuceneTestCase {
+ private static IndexSearcher searcher;
+ private static IndexReader reader;
+ private static Directory directory;
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ Analyzer simplePayloadAnalyzer = new Analyzer() {
+ @Override
+ public TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
+ return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer));
+ }
+ };
+
+ directory = newDirectory();
+ RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
+ newIndexWriterConfig(simplePayloadAnalyzer)
+ .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy()));
+ //writer.infoStream = System.out;
+ for (int i = 0; i < 2000; i++) {
+ Document doc = new Document();
+ doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
+ writer.addDocument(doc);
+ }
+ reader = writer.getReader();
+ searcher = newSearcher(reader);
+ writer.close();
+ }
+
+ @AfterClass
+ public static void afterClass() throws Exception {
+ reader.close();
+ directory.close();
+ searcher = null;
+ reader = null;
+ directory = null;
+ }
+
+ private void checkHits(Query query, int[] results) throws IOException {
+ CheckHits.checkHits(random(), query, "field", searcher, results);
+ }
+
+ public void testSpanPayloadCheck() throws Exception {
+ SpanQuery term1 = new SpanTermQuery(new Term("field", "five"));
+ BytesRef pay = new BytesRef("pos: " + 5);
+ SpanQuery query = new SpanPayloadCheckQuery(term1, Collections.singletonList(pay));
+ checkHits(query, new int[]
+ {1125, 1135, 1145, 1155, 1165, 1175, 1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 1995});
+ assertTrue(searcher.explain(query, 1125).getValue() > 0.0f);
+
+ SpanTermQuery term2 = new SpanTermQuery(new Term("field", "hundred"));
+ SpanNearQuery snq;
+ SpanQuery[] clauses;
+ List<BytesRef> list;
+ BytesRef pay2;
+ clauses = new SpanQuery[2];
+ clauses[0] = term1;
+ clauses[1] = term2;
+ snq = new SpanNearQuery(clauses, 0, true);
+ pay = new BytesRef("pos: " + 0);
+ pay2 = new BytesRef("pos: " + 1);
+ list = new ArrayList<>();
+ list.add(pay);
+ list.add(pay2);
+ query = new SpanPayloadCheckQuery(snq, list);
+ checkHits(query, new int[]
+ {500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599});
+ clauses = new SpanQuery[3];
+ clauses[0] = term1;
+ clauses[1] = term2;
+ clauses[2] = new SpanTermQuery(new Term("field", "five"));
+ snq = new SpanNearQuery(clauses, 0, true);
+ pay = new BytesRef("pos: " + 0);
+ pay2 = new BytesRef("pos: " + 1);
+ BytesRef pay3 = new BytesRef("pos: " + 2);
+ list = new ArrayList<>();
+ list.add(pay);
+ list.add(pay2);
+ list.add(pay3);
+ query = new SpanPayloadCheckQuery(snq, list);
+ checkHits(query, new int[]
+ {505});
+ }
+
+ public void testUnorderedPayloadChecks() throws Exception {
+
+ SpanTermQuery term5 = new SpanTermQuery(new Term("field", "five"));
+ SpanTermQuery term100 = new SpanTermQuery(new Term("field", "hundred"));
+ SpanTermQuery term4 = new SpanTermQuery(new Term("field", "four"));
+ SpanNearQuery nearQuery = new SpanNearQuery(new SpanQuery[]{term5, term100, term4}, 0, false);
+
+ List<BytesRef> payloads = new ArrayList<>();
+ payloads.add(new BytesRef("pos: " + 2));
+ payloads.add(new BytesRef("pos: " + 1));
+ payloads.add(new BytesRef("pos: " + 0));
+
+ SpanPayloadCheckQuery payloadQuery = new SpanPayloadCheckQuery(nearQuery, payloads);
+ checkHits(payloadQuery, new int[]{ 405 });
+
+ payloads.clear();
+ payloads.add(new BytesRef("pos: " + 0));
+ payloads.add(new BytesRef("pos: " + 1));
+ payloads.add(new BytesRef("pos: " + 2));
+
+ payloadQuery = new SpanPayloadCheckQuery(nearQuery, payloads);
+ checkHits(payloadQuery, new int[]{ 504 });
+
+ }
+
+ public void testComplexSpanChecks() throws Exception {
+ SpanTermQuery one = new SpanTermQuery(new Term("field", "one"));
+ SpanTermQuery thous = new SpanTermQuery(new Term("field", "thousand"));
+ //should be one position in between
+ SpanTermQuery hundred = new SpanTermQuery(new Term("field", "hundred"));
+ SpanTermQuery three = new SpanTermQuery(new Term("field", "three"));
+
+ SpanNearQuery oneThous = new SpanNearQuery(new SpanQuery[]{one, thous}, 0, true);
+ SpanNearQuery hundredThree = new SpanNearQuery(new SpanQuery[]{hundred, three}, 0, true);
+ SpanNearQuery oneThousHunThree = new SpanNearQuery(new SpanQuery[]{oneThous, hundredThree}, 1, true);
+ SpanQuery query;
+ //this one's too small
+ query = new SpanPositionRangeQuery(oneThousHunThree, 1, 2);
+ checkHits(query, new int[]{});
+ //this one's just right
+ query = new SpanPositionRangeQuery(oneThousHunThree, 0, 6);
+ checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903});
+
+ List<BytesRef> payloads = new ArrayList<>();
+ BytesRef pay = new BytesRef(("pos: " + 0).getBytes(StandardCharsets.UTF_8));
+ BytesRef pay2 = new BytesRef(("pos: " + 1).getBytes(StandardCharsets.UTF_8));
+ BytesRef pay3 = new BytesRef(("pos: " + 3).getBytes(StandardCharsets.UTF_8));
+ BytesRef pay4 = new BytesRef(("pos: " + 4).getBytes(StandardCharsets.UTF_8));
+ payloads.add(pay);
+ payloads.add(pay2);
+ payloads.add(pay3);
+ payloads.add(pay4);
+ query = new SpanPayloadCheckQuery(oneThousHunThree, payloads);
+ checkHits(query, new int[]{1103, 1203,1303,1403,1503,1603,1703,1803,1903});
+
+ }
+}
Modified: lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java?rev=1702872&r1=1702871&r2=1702872&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java Mon Sep 14 08:52:38 2015
@@ -17,6 +17,8 @@ package org.apache.lucene.search.highlig
* limitations under the License.
*/
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
@@ -29,9 +31,6 @@ import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CachingTokenFilter;
@@ -56,11 +55,9 @@ import org.apache.lucene.index.IndexRead
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
-import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.CommonTermsQuery;
-import org.apache.lucene.queries.CustomScoreProvider;
import org.apache.lucene.queries.CustomScoreQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
@@ -81,8 +78,8 @@ import org.apache.lucene.search.TermRang
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.SynonymTokenizer.TestHighlightRunner;
-import org.apache.lucene.search.join.QueryBitSetProducer;
import org.apache.lucene.search.join.BitSetProducer;
+import org.apache.lucene.search.join.QueryBitSetProducer;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.join.ToChildBlockJoinQuery;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
@@ -1957,7 +1954,7 @@ public class HighlighterTest extends Bas
}
try (IndexReader reader = DirectoryReader.open(dir)) {
Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "words")),
- Collections.singleton("pos: 1".getBytes("UTF-8")));//just match the first "word" occurrence
+ Collections.singletonList(new BytesRef("pos: 1")));//just match the first "word" occurrence
IndexSearcher searcher = newSearcher(reader);
QueryScorer scorer = new QueryScorer(query, searcher.getIndexReader(), FIELD_NAME);
scorer.setUsePayloads(true);
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java?rev=1702872&r1=1702871&r2=1702872&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java Mon Sep 14 08:52:38 2015
@@ -30,6 +30,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.payloads.SpanPayloadCheckQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.handler.component.HighlightComponent;
@@ -1108,7 +1109,7 @@ public class HighlighterTest extends Sol
//Create query matching this payload
Query query = new SpanPayloadCheckQuery(new SpanTermQuery(new Term(FIELD_NAME, "word")),
- Collections.singleton(new byte[]{0,0,0,7}));//bytes for integer 7
+ Collections.singletonList(new BytesRef(new byte[]{0, 0, 0, 7})));//bytes for integer 7
//invoke highlight component... the hard way
final SearchComponent hlComp = h.getCore().getSearchComponent("highlight");