You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2017/03/01 06:39:04 UTC

lucene-solr:master: LUCENE-7717: UnifiedHighlighter and PostingsHighlighter bug in PrefixQuery and TermRangeQuery for multi-byte text

Repository: lucene-solr
Updated Branches:
  refs/heads/master 0baf2fa33 -> ec13032a9


LUCENE-7717: UnifiedHighlighter and PostingsHighlighter bug in PrefixQuery and TermRangeQuery for multi-byte text


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ec13032a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ec13032a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ec13032a

Branch: refs/heads/master
Commit: ec13032a948a29f69d50d41e4859fd38ed5ca377
Parents: 0baf2fa
Author: David Smiley <ds...@apache.org>
Authored: Wed Mar 1 01:38:54 2017 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Wed Mar 1 01:38:54 2017 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 +++
 .../MultiTermHighlighting.java                  | 20 ++++++-------
 .../uhighlight/MultiTermHighlighting.java       | 20 ++++++-------
 .../uhighlight/TestUnifiedHighlighterMTQ.java   | 30 ++++++++++++++++----
 4 files changed, 49 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ec13032a/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6026654..7d8e363 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -257,6 +257,10 @@ Bug Fixes
 * LUCENE-7676: Fixed FilterCodecReader to override more super-class methods.
   Also added TestFilterCodecReader class. (Christine Poerschke)
 
+* LUCENE-7717: The UnifiedHighlighter and PostingsHighlighter were not highlighting
+  prefix queries with multi-byte characters. TermRangeQuery is affected too.
+  (Dmitry Malinin, David Smiley)
+
 ======================= Lucene 6.4.1 =======================
 
 Build

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ec13032a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
index 56345c2..c9733d3 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
@@ -87,16 +87,6 @@ class MultiTermHighlighting {
       list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field)));
     } else if (query instanceof SpanMultiTermQueryWrapper) {
       list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field)));
-    } else if (query instanceof AutomatonQuery) {
-      final AutomatonQuery aq = (AutomatonQuery) query;
-      if (aq.getField().equals(field)) {
-        list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
-          @Override
-          public String toString() {
-            return aq.toString();
-          }
-        });
-      }
     } else if (query instanceof PrefixQuery) {
       final PrefixQuery pq = (PrefixQuery) query;
       Term prefix = pq.getPrefix();
@@ -182,6 +172,16 @@ class MultiTermHighlighting {
           }
         });
       }
+    } else if (query instanceof AutomatonQuery) {
+      final AutomatonQuery aq = (AutomatonQuery) query;
+      if (aq.getField().equals(field)) {
+        list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
+          @Override
+          public String toString() {
+            return aq.toString();
+          }
+        });
+      }
     }
     return list.toArray(new CharacterRunAutomaton[list.size()]);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ec13032a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
index 267d603..89403d5 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
@@ -100,16 +100,6 @@ class MultiTermHighlighting {
     } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
       list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
           fieldMatcher, lookInSpan, preRewriteFunc)));
-    } else if (query instanceof AutomatonQuery) {
-      final AutomatonQuery aq = (AutomatonQuery) query;
-      if (fieldMatcher.test(aq.getField())) {
-        list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
-          @Override
-          public String toString() {
-            return aq.toString();
-          }
-        });
-      }
     } else if (query instanceof PrefixQuery) {
       final PrefixQuery pq = (PrefixQuery) query;
       Term prefix = pq.getPrefix();
@@ -197,6 +187,16 @@ class MultiTermHighlighting {
           }
         });
       }
+    } else if (query instanceof AutomatonQuery) {
+      final AutomatonQuery aq = (AutomatonQuery) query;
+      if (fieldMatcher.test(aq.getField())) {
+        list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
+          @Override
+          public String toString() {
+            return aq.toString();
+          }
+        });
+      }
     }
     return list.toArray(new CharacterRunAutomaton[list.size()]);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ec13032a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
index 10f36a7..4a4b7ed 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@@ -29,6 +29,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
@@ -668,10 +669,11 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
 
     IndexSearcher searcher = newSearcher(ir);
     UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+    // use a variety of common MTQ types
     BooleanQuery query = new BooleanQuery.Builder()
-        .add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.SHOULD)
-        .add(new WildcardQuery(new Term("body", "one")), BooleanClause.Occur.SHOULD)
-        .add(new WildcardQuery(new Term("body", "se*")), BooleanClause.Occur.SHOULD)
+        .add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD)
+        .add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD)
+        .add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD)
         .build();
     TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
     assertEquals(1, topDocs.totalHits);
@@ -732,8 +734,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
     snippets = highlighter.highlight("body", query, topDocs);
     assertEquals(1, snippets.length);
 
-    // Default formatter bolds each hit:
-    assertEquals("<b>Test(body:te*)</b> a <b>one(body:one)</b> <b>sentence(body:se*)</b> document.", snippets[0]);
+    assertEquals("<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.", snippets[0]);
 
     ir.close();
   }
@@ -1054,4 +1055,23 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
     }
   }
 
+  // LUCENE-7717 bug, ordering of MTQ AutomatonQuery detection
+  public void testRussianPrefixQuery() throws IOException {
+    Analyzer analyzer = new StandardAnalyzer();
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
+    String field = "title";
+    Document doc = new Document();
+    doc.add(new Field(field, "\u044f", fieldType)); // Russian char; uses 2 UTF8 bytes
+    iw.addDocument(doc);
+    IndexReader ir = iw.getReader();
+    iw.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    Query query = new PrefixQuery(new Term(field, "\u044f"));
+    TopDocs topDocs = searcher.search(query, 1);
+    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, analyzer);
+    String[] snippets = highlighter.highlight(field, query, topDocs);
+    assertEquals("[<b>\u044f</b>]", Arrays.toString(snippets));
+    ir.close();
+  }
 }