You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2016/06/15 22:03:04 UTC
[1/2] lucene-solr:branch_5_5: LUCENE-7231: WeightedSpanTermExtractor
correctly deals with single-term PhraseQuery
Repository: lucene-solr
Updated Branches:
refs/heads/branch_5_5 3e5832291 -> 90e823ed3
refs/heads/branch_5x fa9940b3e -> c92703d38
LUCENE-7231: WeightedSpanTermExtractor correctly deals with single-term PhraseQuery
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/90e823ed
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/90e823ed
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/90e823ed
Branch: refs/heads/branch_5_5
Commit: 90e823ed37edcce3984296ba6f16654d47f65d64
Parents: 3e58322
Author: Alan Woodward <ro...@apache.org>
Authored: Tue May 17 14:08:45 2016 +0100
Committer: Steve Rowe <sa...@apache.org>
Committed: Wed Jun 15 17:59:25 2016 -0400
----------------------------------------------------------------------
lucene/CHANGES.txt | 23 ++++++++------
.../highlight/WeightedSpanTermExtractor.java | 33 +++++++++++---------
.../search/highlight/HighlighterTest.java | 28 +++++++++++++++++
3 files changed, 60 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/90e823ed/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index c7d23b7..3c5ef91 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -10,6 +10,19 @@ Bug Fixes
* LUCENE-7187: Block join queries' Weight#extractTerms(...) implementations
should delegate to the wrapped weight. (Martijn van Groningen)
+* LUCENE-7279: JapaneseTokenizer throws ArrayIndexOutOfBoundsException
+ on some valid inputs (Mike McCandless)
+
+* LUCENE-7219: Make queryparser/xml (Point|LegacyNumeric)RangeQuery builders
+ match the underlying queries' (lower|upper)Term optionality logic.
+ (Kaneshanathan Srivisagan, Christine Poerschke)
+
+* LUCENE-7284: GapSpans needs to implement positionsCost(). (Daniel Bigham, Alan
+ Woodward)
+
+* LUCENE-7231: WeightedSpanTermExtractor didn't deal correctly with single-term
+ phrase queries. (Eva Popenda, Alan Woodward)
+
======================= Lucene 5.5.1 =======================
Bug fixes
@@ -23,16 +36,6 @@ Bug fixes
* LUCENE-7209: Fixed explanations of FunctionScoreQuery. (Adrien Grand)
-* LUCENE-7279: JapaneseTokenizer throws ArrayIndexOutOfBoundsException
- on some valid inputs (Mike McCandless)
-
-* LUCENE-7219: Make queryparser/xml (Point|LegacyNumeric)RangeQuery builders
- match the underlying queries' (lower|upper)Term optionality logic.
- (Kaneshanathan Srivisagan, Christine Poerschke)
-
-* LUCENE-7284: GapSpans needs to implement positionsCost(). (Daniel Bigham, Alan
- Woodward)
-
======================= Lucene 5.5.0 =======================
New Features
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/90e823ed/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
index dc4e50a..1cc8f99 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
@@ -116,24 +116,29 @@ public class WeightedSpanTermExtractor {
} else if (query instanceof PhraseQuery) {
PhraseQuery phraseQuery = ((PhraseQuery) query);
Term[] phraseQueryTerms = phraseQuery.getTerms();
- SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
- for (int i = 0; i < phraseQueryTerms.length; i++) {
- clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
+ if (phraseQueryTerms.length == 1) {
+ extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
}
+ else {
+ SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
+ for (int i = 0; i < phraseQueryTerms.length; i++) {
+ clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
+ }
- // sum position increments beyond 1
- int positionGaps = 0;
- int[] positions = phraseQuery.getPositions();
- if (positions.length >= 2) {
- // positions are in increasing order. max(0,...) is just a safeguard.
- positionGaps = Math.max(0, positions[positions.length-1] - positions[0] - positions.length + 1);
- }
+ // sum position increments beyond 1
+ int positionGaps = 0;
+ int[] positions = phraseQuery.getPositions();
+ if (positions.length >= 2) {
+ // positions are in increasing order. max(0,...) is just a safeguard.
+ positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
+ }
- //if original slop is 0 then require inOrder
- boolean inorder = (phraseQuery.getSlop() == 0);
+ //if original slop is 0 then require inOrder
+ boolean inorder = (phraseQuery.getSlop() == 0);
- SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
- extractWeightedSpanTerms(terms, sp, boost);
+ SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
+ extractWeightedSpanTerms(terms, sp, boost);
+ }
} else if (query instanceof TermQuery) {
extractWeightedTerms(terms, query, boost);
} else if (query instanceof SpanQuery) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/90e823ed/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
index 6d614f6..81db3de 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@@ -40,6 +40,7 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -93,6 +94,7 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
+import org.junit.Test;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
@@ -1563,6 +1565,32 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
helper.start();
}
+ @Test
+ public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
+
+ final Analyzer analyzer = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ return new TokenStreamComponents(new NGramTokenizer(4, 4));
+ }
+ };
+ final String fieldName = "substring";
+
+ final List<BytesRef> list = new ArrayList<>();
+ list.add(new BytesRef("uchu"));
+ final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
+
+ final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
+ final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
+
+ final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
+ highlighter.setTextFragmenter(new SimpleFragmenter(100));
+ final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
+
+ assertEquals("B<b>uchu</b>ng",fragment);
+
+ }
+
public void testUnRewrittenQuery() throws Exception {
final TestHighlightRunner helper = new TestHighlightRunner() {
[2/2] lucene-solr:branch_5x: LUCENE-7231: WeightedSpanTermExtractor
correctly deals with single-term PhraseQuery
Posted by sa...@apache.org.
LUCENE-7231: WeightedSpanTermExtractor correctly deals with single-term PhraseQuery
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c92703d3
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c92703d3
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c92703d3
Branch: refs/heads/branch_5x
Commit: c92703d3875bf8a47ff828d5910f78772e3841af
Parents: fa9940b
Author: Alan Woodward <ro...@apache.org>
Authored: Tue May 17 14:08:45 2016 +0100
Committer: Steve Rowe <sa...@apache.org>
Committed: Wed Jun 15 18:02:49 2016 -0400
----------------------------------------------------------------------
.../highlight/WeightedSpanTermExtractor.java | 33 +++++++++++---------
.../search/highlight/HighlighterTest.java | 28 +++++++++++++++++
2 files changed, 47 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c92703d3/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
index 85bf79a..b527172 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
@@ -116,24 +116,29 @@ public class WeightedSpanTermExtractor {
} else if (query instanceof PhraseQuery) {
PhraseQuery phraseQuery = ((PhraseQuery) query);
Term[] phraseQueryTerms = phraseQuery.getTerms();
- SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
- for (int i = 0; i < phraseQueryTerms.length; i++) {
- clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
+ if (phraseQueryTerms.length == 1) {
+ extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
}
+ else {
+ SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
+ for (int i = 0; i < phraseQueryTerms.length; i++) {
+ clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
+ }
- // sum position increments beyond 1
- int positionGaps = 0;
- int[] positions = phraseQuery.getPositions();
- if (positions.length >= 2) {
- // positions are in increasing order. max(0,...) is just a safeguard.
- positionGaps = Math.max(0, positions[positions.length-1] - positions[0] - positions.length + 1);
- }
+ // sum position increments beyond 1
+ int positionGaps = 0;
+ int[] positions = phraseQuery.getPositions();
+ if (positions.length >= 2) {
+ // positions are in increasing order. max(0,...) is just a safeguard.
+ positionGaps = Math.max(0, positions[positions.length - 1] - positions[0] - positions.length + 1);
+ }
- //if original slop is 0 then require inOrder
- boolean inorder = (phraseQuery.getSlop() == 0);
+ //if original slop is 0 then require inOrder
+ boolean inorder = (phraseQuery.getSlop() == 0);
- SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
- extractWeightedSpanTerms(terms, sp, boost);
+ SpanNearQuery sp = new SpanNearQuery(clauses, phraseQuery.getSlop() + positionGaps, inorder);
+ extractWeightedSpanTerms(terms, sp, boost);
+ }
} else if (query instanceof TermQuery) {
extractWeightedTerms(terms, query, boost);
} else if (query instanceof SpanQuery) {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c92703d3/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
index 6d614f6..81db3de 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@@ -40,6 +40,7 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -93,6 +94,7 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
+import org.junit.Test;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
@@ -1563,6 +1565,32 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
helper.start();
}
+ @Test
+ public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
+
+ final Analyzer analyzer = new Analyzer() {
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ return new TokenStreamComponents(new NGramTokenizer(4, 4));
+ }
+ };
+ final String fieldName = "substring";
+
+ final List<BytesRef> list = new ArrayList<>();
+ list.add(new BytesRef("uchu"));
+ final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
+
+ final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
+ final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
+
+ final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
+ highlighter.setTextFragmenter(new SimpleFragmenter(100));
+ final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
+
+ assertEquals("B<b>uchu</b>ng",fragment);
+
+ }
+
public void testUnRewrittenQuery() throws Exception {
final TestHighlightRunner helper = new TestHighlightRunner() {