You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2017/03/01 06:39:04 UTC
lucene-solr:master: LUCENE-7717: UnifiedHighlighter and
PostingsHighlighter bug in PrefixQuery and TermRangeQuery for multi-byte text
Repository: lucene-solr
Updated Branches:
refs/heads/master 0baf2fa33 -> ec13032a9
LUCENE-7717: UnifiedHighlighter and PostingsHighlighter bug in PrefixQuery and TermRangeQuery for multi-byte text
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ec13032a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ec13032a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ec13032a
Branch: refs/heads/master
Commit: ec13032a948a29f69d50d41e4859fd38ed5ca377
Parents: 0baf2fa
Author: David Smiley <ds...@apache.org>
Authored: Wed Mar 1 01:38:54 2017 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Wed Mar 1 01:38:54 2017 -0500
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 +++
.../MultiTermHighlighting.java | 20 ++++++-------
.../uhighlight/MultiTermHighlighting.java | 20 ++++++-------
.../uhighlight/TestUnifiedHighlighterMTQ.java | 30 ++++++++++++++++----
4 files changed, 49 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ec13032a/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6026654..7d8e363 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -257,6 +257,10 @@ Bug Fixes
* LUCENE-7676: Fixed FilterCodecReader to override more super-class methods.
Also added TestFilterCodecReader class. (Christine Poerschke)
+* LUCENE-7717: The UnifiedHighlighter and PostingsHighlighter were not highlighting
+ prefix queries with multi-byte characters. TermRangeQuery is affected too.
+ (Dmitry Malinin, David Smiley)
+
======================= Lucene 6.4.1 =======================
Build
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ec13032a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
index 56345c2..c9733d3 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/postingshighlight/MultiTermHighlighting.java
@@ -87,16 +87,6 @@ class MultiTermHighlighting {
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field)));
} else if (query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field)));
- } else if (query instanceof AutomatonQuery) {
- final AutomatonQuery aq = (AutomatonQuery) query;
- if (aq.getField().equals(field)) {
- list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
- @Override
- public String toString() {
- return aq.toString();
- }
- });
- }
} else if (query instanceof PrefixQuery) {
final PrefixQuery pq = (PrefixQuery) query;
Term prefix = pq.getPrefix();
@@ -182,6 +172,16 @@ class MultiTermHighlighting {
}
});
}
+ } else if (query instanceof AutomatonQuery) {
+ final AutomatonQuery aq = (AutomatonQuery) query;
+ if (aq.getField().equals(field)) {
+ list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
+ @Override
+ public String toString() {
+ return aq.toString();
+ }
+ });
+ }
}
return list.toArray(new CharacterRunAutomaton[list.size()]);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ec13032a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
index 267d603..89403d5 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
@@ -100,16 +100,6 @@ class MultiTermHighlighting {
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
fieldMatcher, lookInSpan, preRewriteFunc)));
- } else if (query instanceof AutomatonQuery) {
- final AutomatonQuery aq = (AutomatonQuery) query;
- if (fieldMatcher.test(aq.getField())) {
- list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
- @Override
- public String toString() {
- return aq.toString();
- }
- });
- }
} else if (query instanceof PrefixQuery) {
final PrefixQuery pq = (PrefixQuery) query;
Term prefix = pq.getPrefix();
@@ -197,6 +187,16 @@ class MultiTermHighlighting {
}
});
}
+ } else if (query instanceof AutomatonQuery) {
+ final AutomatonQuery aq = (AutomatonQuery) query;
+ if (fieldMatcher.test(aq.getField())) {
+ list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
+ @Override
+ public String toString() {
+ return aq.toString();
+ }
+ });
+ }
}
return list.toArray(new CharacterRunAutomaton[list.size()]);
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ec13032a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
index 10f36a7..4a4b7ed 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@@ -29,6 +29,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -668,10 +669,11 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+ // use a variety of common MTQ types
BooleanQuery query = new BooleanQuery.Builder()
- .add(new WildcardQuery(new Term("body", "te*")), BooleanClause.Occur.SHOULD)
- .add(new WildcardQuery(new Term("body", "one")), BooleanClause.Occur.SHOULD)
- .add(new WildcardQuery(new Term("body", "se*")), BooleanClause.Occur.SHOULD)
+ .add(new PrefixQuery(new Term("body", "te")), BooleanClause.Occur.SHOULD)
+ .add(new WildcardQuery(new Term("body", "*one*")), BooleanClause.Occur.SHOULD)
+ .add(new FuzzyQuery(new Term("body", "zentence~")), BooleanClause.Occur.SHOULD)
.build();
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
@@ -732,8 +734,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
snippets = highlighter.highlight("body", query, topDocs);
assertEquals(1, snippets.length);
- // Default formatter bolds each hit:
- assertEquals("<b>Test(body:te*)</b> a <b>one(body:one)</b> <b>sentence(body:se*)</b> document.", snippets[0]);
+ assertEquals("<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.", snippets[0]);
ir.close();
}
@@ -1054,4 +1055,23 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
}
}
+ // LUCENE-7717 bug, ordering of MTQ AutomatonQuery detection
+ public void testRussianPrefixQuery() throws IOException {
+ Analyzer analyzer = new StandardAnalyzer();
+ RandomIndexWriter iw = new RandomIndexWriter(random(), dir, analyzer);
+ String field = "title";
+ Document doc = new Document();
+ doc.add(new Field(field, "\u044f", fieldType)); // Russian char; uses 2 UTF8 bytes
+ iw.addDocument(doc);
+ IndexReader ir = iw.getReader();
+ iw.close();
+
+ IndexSearcher searcher = newSearcher(ir);
+ Query query = new PrefixQuery(new Term(field, "\u044f"));
+ TopDocs topDocs = searcher.search(query, 1);
+ UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, analyzer);
+ String[] snippets = highlighter.highlight(field, query, topDocs);
+ assertEquals("[<b>\u044f</b>]", Arrays.toString(snippets));
+ ir.close();
+ }
}