You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2016/09/09 15:04:54 UTC
lucene-solr:branch_5_5: LUCENE-7417: Highlighter WSTE didn't handle single-term MultiPhraseQuery. Also updated to Java 5 for-each in this method.

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_5_5 8655b97b2 -> 7350d14a9


LUCENE-7417: Highlighter WSTE didn't handle single-term MultiPhraseQuery.
Also updated to Java 5 for-each in this method.

(cherry picked from commit 9a8ff64)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7350d14a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7350d14a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7350d14a

Branch: refs/heads/branch_5_5
Commit: 7350d14a9f8a2bedf07d8ae7b89cf67884ca879e
Parents: 8655b97
Author: David Smiley <ds...@apache.org>
Authored: Fri Sep 9 10:06:39 2016 -0400
Committer: David Smiley <ds...@apache.org>
Committed: Fri Sep 9 11:04:48 2016 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  9 ++++--
 .../highlight/WeightedSpanTermExtractor.java    | 26 +++++++++--------
 .../search/highlight/HighlighterTest.java       | 30 +++++++++++++-------
 3 files changed, 40 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7350d14a/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 3c8c621..fa99930 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -4,7 +4,12 @@ For more information on past and future Lucene versions, please see:
 http://s.apache.org/luceneversions
 
 ======================= Lucene 5.5.3 =======================
-(No Changes)
+
+Bug Fixes
+
+* LUCENE-7417: The standard Highlighter could throw an IllegalArgumentException when
+  trying to highlight a query containing a degenerate case of a MultiPhraseQuery with one
+  term.  (Thomas Kappler via David Smiley)
 
 ======================= Lucene 5.5.2 =======================
 
@@ -59,7 +64,7 @@ Bug fixes
 
 * LUCENE-7188: remove incorrect sanity check in NRTCachingDirectory.listAll()
   that led to IllegalStateException being thrown when nothing was wrong.
-  (David Smiley, yonik)  
+  (David Smiley, yonik)
 
 * LUCENE-7209: Fixed explanations of FunctionScoreQuery. (Adrien Grand)
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7350d14a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
index 1cc8f99..52456e8 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
@@ -118,8 +118,7 @@ public class WeightedSpanTermExtractor {
       Term[] phraseQueryTerms = phraseQuery.getTerms();
       if (phraseQueryTerms.length == 1) {
         extractWeightedSpanTerms(terms, new SpanTermQuery(phraseQueryTerms[0]), boost);
-      }
-      else {
+      } else {
         SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
         for (int i = 0; i < phraseQueryTerms.length; i++) {
           clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
@@ -155,8 +154,8 @@ public class WeightedSpanTermExtractor {
       // this query is TermContext sensitive.
       extractWeightedTerms(terms, query, boost);
     } else if (query instanceof DisjunctionMaxQuery) {
-      for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
-        extract(iterator.next(), boost, terms);
+      for (Query clause : ((DisjunctionMaxQuery) query)) {
+        extract(clause, boost, terms);
       }
     } else if (query instanceof ToParentBlockJoinQuery) {
       extract(((ToParentBlockJoinQuery) query).getChildQuery(), boost, terms);
@@ -186,16 +185,15 @@ public class WeightedSpanTermExtractor {
             disjuncts = (disjunctLists[positions[i]] = new ArrayList<>(termArray.length));
             ++distinctPositions;
           }
-          for (int j = 0; j < termArray.length; ++j) {
-            disjuncts.add(new SpanTermQuery(termArray[j]));
+          for (Term aTermArray : termArray) {
+            disjuncts.add(new SpanTermQuery(aTermArray));
           }
         }
 
         int positionGaps = 0;
         int position = 0;
         final SpanQuery[] clauses = new SpanQuery[distinctPositions];
-        for (int i = 0; i < disjunctLists.length; ++i) {
-          List<SpanQuery> disjuncts = disjunctLists[i];
+        for (List<SpanQuery> disjuncts : disjunctLists) {
           if (disjuncts != null) {
             clauses[position++] = new SpanOrQuery(disjuncts
                 .toArray(new SpanQuery[disjuncts.size()]));
@@ -204,11 +202,15 @@ public class WeightedSpanTermExtractor {
           }
         }
 
-        final int slop = mpq.getSlop();
-        final boolean inorder = (slop == 0);
+        if (clauses.length == 1) {
+          extractWeightedSpanTerms(terms, clauses[0], boost);
+        } else {
+          final int slop = mpq.getSlop();
+          final boolean inorder = (slop == 0);
 
-        SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
-        extractWeightedSpanTerms(terms, sp, boost);
+          SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
+          extractWeightedSpanTerms(terms, sp, boost);
+        }
       }
     } else if (query instanceof MatchAllDocsQuery) {
       //nothing

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7350d14a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
index 81db3de..54c7505 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@@ -94,7 +94,6 @@ import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.automaton.Automata;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.apache.lucene.util.automaton.RegExp;
-import org.junit.Test;
 import org.w3c.dom.Element;
 import org.w3c.dom.NodeList;
 
@@ -1565,30 +1564,39 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
     helper.start();
   }
 
-  @Test
   public void testHighlighterWithPhraseQuery() throws IOException, InvalidTokenOffsetsException {
+    final String fieldName = "substring";
+
+    final PhraseQuery query = new PhraseQuery(fieldName, new BytesRef[] { new BytesRef("uchu") });
+
+    assertHighlighting(query, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
+  }
+
+  public void testHighlighterWithMultiPhraseQuery() throws IOException, InvalidTokenOffsetsException {
+    final String fieldName = "substring";
+
+    final MultiPhraseQuery mpq = new MultiPhraseQuery();
+    mpq.add(new Term(fieldName, "uchu"));
 
+    assertHighlighting(mpq, new SimpleHTMLFormatter("<b>", "</b>"), "Buchung", "B<b>uchu</b>ng", fieldName);
+  }
+
+  private void assertHighlighting(Query query, Formatter formatter, String text, String expected, String fieldName)
+      throws IOException, InvalidTokenOffsetsException {
     final Analyzer analyzer = new Analyzer() {
       @Override
       protected TokenStreamComponents createComponents(String fieldName) {
         return new TokenStreamComponents(new NGramTokenizer(4, 4));
       }
     };
-    final String fieldName = "substring";
-
-    final List<BytesRef> list = new ArrayList<>();
-    list.add(new BytesRef("uchu"));
-    final PhraseQuery query = new PhraseQuery(fieldName, list.toArray(new BytesRef[list.size()]));
 
     final QueryScorer fragmentScorer = new QueryScorer(query, fieldName);
-    final SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<b>", "</b>");
 
     final Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
     highlighter.setTextFragmenter(new SimpleFragmenter(100));
-    final String fragment = highlighter.getBestFragment(analyzer, fieldName, "Buchung");
-
-    assertEquals("B<b>uchu</b>ng",fragment);
+    final String fragment = highlighter.getBestFragment(analyzer, fieldName, text);
 
+    assertEquals(expected, fragment);
   }
 
   public void testUnRewrittenQuery() throws Exception {