You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2019/06/21 21:06:05 UTC
[lucene-solr] branch master updated: LUCENE-8848 LUCENE-7757 LUCENE-8492: UnifiedHighlighter.hasUnrecognizedQuery The UH now detects that parts of the query are not understood by it. When found, it highlights more safely/reliably. Fixes compatibility with complex and surround query parsers.

This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new 54cc701  LUCENE-8848 LUCENE-7757 LUCENE-8492: UnifiedHighlighter.hasUnrecognizedQuery The UH now detects that parts of the query are not understood by it. When found, it highlights more safely/reliably. Fixes compatibility with complex and surround query parsers.
54cc701 is described below

commit 54cc70127b22083198f1c44f83ccf4cdf769ac77
Author: David Smiley <ds...@apache.org>
AuthorDate: Fri Jun 21 17:05:56 2019 -0400

    LUCENE-8848 LUCENE-7757 LUCENE-8492: UnifiedHighlighter.hasUnrecognizedQuery
    The UH now detects that parts of the query are not understood by it.
    When found, it highlights more safely/reliably.
    Fixes compatibility with complex and surround query parsers.
---
 lucene/CHANGES.txt                                 |   5 +
 .../uhighlight/MemoryIndexOffsetStrategy.java      |   9 +-
 .../search/uhighlight/MultiTermHighlighting.java   |   8 ++
 .../search/uhighlight/NoOpOffsetStrategy.java      |   2 +-
 .../uhighlight/TokenStreamOffsetStrategy.java      |  16 +---
 .../lucene/search/uhighlight/UHComponents.java     |   8 +-
 .../search/uhighlight/UnifiedHighlighter.java      |  66 +++++++++----
 .../search/uhighlight/TestUnifiedHighlighter.java  | 103 +++++++++++++++++++++
 .../TestUnifiedHighlighterExtensibility.java       |  33 ++++---
 .../solr/highlight/TestUnifiedSolrHighlighter.java |  12 +++
 10 files changed, 218 insertions(+), 44 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 2216d69..1892609 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -109,6 +109,11 @@ Improvements
 * LUCENE-8845: Allow Intervals.prefix() and Intervals.wildcard() to specify
   their maximum allowed expansions (Alan Woodward)
 
+* LUCENE-8848 LUCENE-7757 LUCENE-8492: The UnifiedHighlighter now detects that parts of the query are not understood by
+  it, and thus it should not make optimizations that result in no highlights or slow highlighting.  This generally works
+  best for WEIGHT_MATCHES mode.  Consequently queries produced by ComplexPhraseQueryParser and the surround QueryParser
+  will now highlight correctly. (David Smiley)
+
 Optimizations
 
 * LUCENE-8796: Use exponential search instead of binary search in
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
index 1f4455f..e53d6e4 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
@@ -57,6 +57,11 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
    * Build one {@link CharacterRunAutomaton} matching any term the query might match.
    */
   private static CharacterRunAutomaton buildCombinedAutomaton(UHComponents components) {
+    // We don't know enough about the query to do this confidently
+    if (components.getTerms() == null || components.getAutomata() == null) {
+      return null;
+    }
+
     List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
     if (components.getTerms().length > 0) {
       allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(components.getTerms()))));
@@ -93,7 +98,9 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
     TokenStream tokenStream = tokenStream(content);
 
     // Filter the tokenStream to applicable terms
-    tokenStream = newKeepWordFilter(tokenStream, preMemIndexFilterAutomaton);
+    if (preMemIndexFilterAutomaton != null) {
+      tokenStream = newKeepWordFilter(tokenStream, preMemIndexFilterAutomaton);
+    }
     memoryIndex.reset();
     memoryIndex.addField(getField(), tokenStream);//note: calls tokenStream.reset() & close()
 
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
index d079599..8181c26 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
@@ -53,6 +53,14 @@ final class MultiTermHighlighting {
     return collector.runAutomata.toArray(new CharacterRunAutomaton[0]);
   }
 
+  /**
+   * Indicates if the the leaf query (from {@link QueryVisitor#visitLeaf(Query)}) is a type of query that
+   * we can extract automata from.
+   */
+  public static boolean canExtractAutomataFromLeafQuery(Query query) {
+    return query instanceof AutomatonQuery || query instanceof FuzzyQuery;
+  }
+
   private static class AutomataCollector extends QueryVisitor {
 
     List<CharacterRunAutomaton> runAutomata = new ArrayList<>();
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
index 80528ce..08f2b12 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/NoOpOffsetStrategy.java
@@ -34,7 +34,7 @@ public class NoOpOffsetStrategy extends FieldOffsetStrategy {
   public static final NoOpOffsetStrategy INSTANCE = new NoOpOffsetStrategy();
 
   private NoOpOffsetStrategy() {
-    super(new UHComponents("_ignored_", (s) -> false, new MatchNoDocsQuery(), new BytesRef[0], PhraseHelper.NONE, new CharacterRunAutomaton[0], Collections.emptySet()));
+    super(new UHComponents("_ignored_", (s) -> false, new MatchNoDocsQuery(), new BytesRef[0], PhraseHelper.NONE, new CharacterRunAutomaton[0], false, Collections.emptySet()));
   }
 
   @Override
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
index 677ee4b..a7282b6 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/TokenStreamOffsetStrategy.java
@@ -34,21 +34,15 @@ import org.apache.lucene.util.automaton.CharacterRunAutomaton;
  */
 public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
 
-  private static final BytesRef[] ZERO_LEN_BYTES_REF_ARRAY = new BytesRef[0];
+  private final CharacterRunAutomaton[] combinedAutomata;
 
   public TokenStreamOffsetStrategy(UHComponents components, Analyzer indexAnalyzer) {
-    super(new UHComponents(
-            components.getField(),
-            components.getFieldMatcher(),
-            components.getQuery(),
-            ZERO_LEN_BYTES_REF_ARRAY,
-            components.getPhraseHelper(),
-            convertTermsToAutomata(components.getTerms(), components.getAutomata()),
-            components.getHighlightFlags()),
-        indexAnalyzer);
+    super(components, indexAnalyzer);
     assert components.getPhraseHelper().hasPositionSensitivity() == false;
+    combinedAutomata = convertTermsToAutomata(components.getTerms(), components.getAutomata());
   }
 
+  //TODO this is inefficient; instead build a union automata just for terms part.
   private static CharacterRunAutomaton[] convertTermsToAutomata(BytesRef[] terms, CharacterRunAutomaton[] automata) {
     CharacterRunAutomaton[] newAutomata = new CharacterRunAutomaton[terms.length + automata.length];
     for (int i = 0; i < terms.length; i++) {
@@ -67,7 +61,7 @@ public class TokenStreamOffsetStrategy extends AnalysisOffsetStrategy {
 
   @Override
   public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException {
-    return new TokenStreamOffsetsEnum(tokenStream(content), components.getAutomata());
+    return new TokenStreamOffsetsEnum(tokenStream(content), combinedAutomata);
   }
 
   private static class TokenStreamOffsetsEnum extends OffsetsEnum {
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java
index eed1e9c..4af6d70 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UHComponents.java
@@ -36,17 +36,19 @@ public class UHComponents {
   private final BytesRef[] terms; // Query: all terms we extracted (some may be position sensitive)
   private final PhraseHelper phraseHelper; // Query: position-sensitive information
   private final CharacterRunAutomaton[] automata; // Query: wildcards (i.e. multi-term query), not position sensitive
+  private final boolean hasUnrecognizedQueryPart; // Query: if part of the query (other than the extracted terms / automata) is a leaf we don't know
   private final Set<UnifiedHighlighter.HighlightFlag> highlightFlags;
 
   public UHComponents(String field, Predicate<String> fieldMatcher, Query query,
                       BytesRef[] terms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata,
-                      Set<UnifiedHighlighter.HighlightFlag> highlightFlags) {
+                      boolean hasUnrecognizedQueryPart, Set<UnifiedHighlighter.HighlightFlag> highlightFlags) {
     this.field = field;
     this.fieldMatcher = fieldMatcher;
     this.query = query;
     this.terms = terms;
     this.phraseHelper = phraseHelper;
     this.automata = automata;
+    this.hasUnrecognizedQueryPart = hasUnrecognizedQueryPart;
     this.highlightFlags = highlightFlags;
   }
 
@@ -74,6 +76,10 @@ public class UHComponents {
     return automata;
   }
 
+  public boolean hasUnrecognizedQueryPart() {
+    return hasUnrecognizedQueryPart;
+  }
+
   public Set<UnifiedHighlighter.HighlightFlag> getHighlightFlags() {
     return highlightFlags;
   }
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index a29176a..e6c0742 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -51,6 +51,8 @@ import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
 import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryVisitor;
@@ -750,13 +752,8 @@ public class UnifiedHighlighter {
   }
 
   protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
-    Predicate<String> fieldMatcher = getFieldMatcher(field);
-    BytesRef[] terms = filterExtractedTerms(fieldMatcher, allTerms);
-    Set<HighlightFlag> highlightFlags = getFlags(field);
-    PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
-    CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
-    OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
-    UHComponents components = new UHComponents(field, fieldMatcher, query, terms, phraseHelper, automata, highlightFlags);
+    UHComponents components = getHighlightComponents(field, query, allTerms);
+    OffsetSource offsetSource = getOptimizedOffsetSource(components);
     return new FieldHighlighter(field,
         getOffsetStrategy(offsetSource, components),
         new SplittingBreakIterator(getBreakIterator(field), UnifiedHighlighter.MULTIVAL_SEP_CHAR),
@@ -766,6 +763,41 @@ public class UnifiedHighlighter {
         getFormatter(field));
   }
 
+  protected UHComponents getHighlightComponents(String field, Query query, Set<Term> allTerms) {
+    Predicate<String> fieldMatcher = getFieldMatcher(field);
+    Set<HighlightFlag> highlightFlags = getFlags(field);
+    PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
+    boolean queryHasUnrecognizedPart = hasUnrecognizedQuery(fieldMatcher, query);
+    BytesRef[] terms = null;
+    CharacterRunAutomaton[] automata = null;
+    if (!highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES) || !queryHasUnrecognizedPart) {
+      terms = filterExtractedTerms(fieldMatcher, allTerms);
+      automata = getAutomata(field, query, highlightFlags);
+    } // otherwise don't need to extract
+    return new UHComponents(field, fieldMatcher, query, terms, phraseHelper, automata, queryHasUnrecognizedPart, highlightFlags);
+  }
+
+  protected boolean hasUnrecognizedQuery(Predicate<String> fieldMatcher, Query query) {
+    boolean[] hasUnknownLeaf = new boolean[1];
+    query.visit(new QueryVisitor() {
+      @Override
+      public boolean acceptField(String field) {
+        // checking hasUnknownLeaf is a trick to exit early
+        return hasUnknownLeaf[0] == false && fieldMatcher.test(field);
+      }
+
+      @Override
+      public void visitLeaf(Query query) {
+        if (MultiTermHighlighting.canExtractAutomataFromLeafQuery(query) == false) {
+          if (!(query instanceof MatchAllDocsQuery || query instanceof MatchNoDocsQuery)) {
+            hasUnknownLeaf[0] = true;
+          }
+        }
+      }
+    });
+    return hasUnknownLeaf[0];
+  }
+
   protected static BytesRef[] filterExtractedTerms(Predicate<String> fieldMatcher, Set<Term> queryTerms) {
     // Strip off the redundant field and sort the remaining terms
     SortedSet<BytesRef> filteredTerms = new TreeSet<>();
@@ -819,26 +851,26 @@ public class UnifiedHighlighter {
         : ZERO_LEN_AUTOMATA_ARRAY;
   }
 
-  protected OffsetSource getOptimizedOffsetSource(String field, BytesRef[] terms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata) {
-    OffsetSource offsetSource = getOffsetSource(field);
+  protected OffsetSource getOptimizedOffsetSource(UHComponents components) {
+    OffsetSource offsetSource = getOffsetSource(components.getField());
 
-    if (terms.length == 0 && automata.length == 0 && !phraseHelper.willRewrite()) {
+    // null automata means unknown, so assume a possibility
+    boolean mtqOrRewrite = components.getAutomata() == null || components.getAutomata().length > 0
+        || components.getPhraseHelper().willRewrite() || components.hasUnrecognizedQueryPart();
+
+    // null terms means unknown, so assume something to highlight
+    if (mtqOrRewrite == false && components.getTerms() != null && components.getTerms().length == 0) {
       return OffsetSource.NONE_NEEDED; //nothing to highlight
     }
 
     switch (offsetSource) {
       case POSTINGS:
-        if (phraseHelper.willRewrite()) {
-          // We can't choose the postings offset source when there is "rewriting" in the strict phrase
-          // processing (rare but possible). Postings requires knowing all the terms (except wildcards)
-          // up front.
-          return OffsetSource.ANALYSIS;
-        } else if (automata.length > 0) {
+        if (mtqOrRewrite) { // may need to see scan through all terms for the highlighted document efficiently
           return OffsetSource.ANALYSIS;
         }
         break;
       case POSTINGS_WITH_TERM_VECTORS:
-        if (!phraseHelper.willRewrite() && automata.length == 0) {
+        if (mtqOrRewrite == false) {
           return OffsetSource.POSTINGS; //We don't need term vectors
         }
         break;
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index 3e2cc2e..2565e7e 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -48,10 +48,13 @@ import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryVisitor;
 import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.uhighlight.UnifiedHighlighter.HighlightFlag;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
@@ -1357,4 +1360,104 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
 
     ir.close();
   }
+
+  public void testNotReanalyzed() throws Exception {
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+    if (fieldType == UHTestHelper.reanalysisType) {
+      return; // we're testing the *other* cases
+    }
+
+    Field body = new Field("body", "", fieldType);
+    Document doc = new Document();
+    doc.add(body);
+
+    body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
+    iw.addDocument(doc);
+
+    IndexReader ir = iw.getReader();
+    iw.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        throw new AssertionError("shouldn't be called");
+      }
+    });
+    Query query = new TermQuery(new Term("body", "highlighting"));
+    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits.value);
+    String snippets[] = highlighter.highlight("body", query, topDocs);
+    assertEquals(1, snippets.length);
+    assertEquals("Just a test <b>highlighting</b> from postings. ", snippets[0]);
+
+    ir.close();
+  }
+
+  public void testUnknownQueryWithWeightMatches() throws IOException {
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+
+    Field body = new Field("body", "", fieldType);
+    Document doc = new Document();
+    doc.add(body);
+
+    body.setStringValue("Test a one sentence document.");
+    iw.addDocument(doc);
+
+    IndexReader ir = iw.getReader();
+    iw.close();
+
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer,
+        EnumSet.of(HighlightFlag.WEIGHT_MATCHES), null);
+    Query query = new BooleanQuery.Builder()
+        // simple term query body:one
+        .add(new TermQuery(new Term(body.name(), "one")), BooleanClause.Occur.MUST)
+        // a custom query, a leaf, that which matches body:sentence
+        //    Note this isn't even an MTQ.  What matters is that Weight.matches works.
+        .add(new Query() {
+          @Override
+          public String toString(String field) {
+            return "bogus";
+          }
+
+          @Override
+          public Query rewrite(IndexReader reader) {
+            return this;
+          }
+
+          // we don't visit terms, and we don't expose an automata.  Thus this appears as some unknown leaf.
+          @Override
+          public void visit(QueryVisitor visitor) {
+            if (visitor.acceptField(body.name())) {
+              visitor.visitLeaf(this);
+            }
+          }
+
+          @Override
+          public boolean equals(Object obj) {
+            return this == obj;
+          }
+
+          @Override
+          public int hashCode() {
+            return System.identityHashCode(this);
+          }
+
+          @Override
+          public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
+            //TODO maybe should loop through index terms to show we can see other terms
+            return new TermQuery(new Term(body.name(), "sentence")).createWeight(searcher, scoreMode, boost);
+          }
+        }, BooleanClause.Occur.MUST)
+        .build();
+    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+    assertEquals(1, topDocs.totalHits.value);
+    String[] snippets = highlighter.highlight("body", query, topDocs);
+    assertEquals(1, snippets.length);
+    assertEquals("Test a <b>one</b> <b>sentence</b> document.", snippets[0]);
+
+    ir.close();
+  }
 }
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
index 3910b5a..def4431 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
@@ -65,7 +65,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
         (s) -> false,
         new MatchAllDocsQuery(), new BytesRef[0],
         PhraseHelper.NONE,
-        new CharacterRunAutomaton[0], Collections.emptySet())) {
+        new CharacterRunAutomaton[0], false, Collections.emptySet())) {
       @Override
       public UnifiedHighlighter.OffsetSource getOffsetSource() {
         return offsetSource;
@@ -152,22 +152,18 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
       @Override
       protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
         // THIS IS A COPY of the superclass impl; but use CustomFieldHighlighter
-        Predicate<String> fieldMatcher = getFieldMatcher(field);
-        BytesRef[] terms = filterExtractedTerms(fieldMatcher, allTerms);
-        Set<HighlightFlag> highlightFlags = getFlags(field);
-        PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
-        CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
-        OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
+        UHComponents components = getHighlightComponents(field, query, allTerms);
+        OffsetSource offsetSource = getOptimizedOffsetSource(components);
 
-        UHComponents components = new UHComponents(field, fieldMatcher, query, terms, phraseHelper, automata, highlightFlags);
         // test all is accessible
-        components.getAutomata();
-        components.getPhraseHelper();
-        components.getTerms();
         components.getField();
-        components.getHighlightFlags();
-        components.getQuery();
         components.getFieldMatcher();
+        components.getQuery();
+        components.getTerms();
+        components.getPhraseHelper();
+        components.getAutomata();
+        components.hasUnrecognizedQueryPart();
+        components.getHighlightFlags();
 
         return new CustomFieldHighlighter(field,
             getOffsetStrategy(offsetSource, components),
@@ -179,6 +175,17 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
       }
 
       @Override
+      protected UHComponents getHighlightComponents(String field, Query query, Set<Term> allTerms) {
+        Predicate<String> fieldMatcher = getFieldMatcher(field);
+        BytesRef[] terms = filterExtractedTerms(fieldMatcher, allTerms);
+        Set<HighlightFlag> highlightFlags = getFlags(field);
+        PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
+        CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
+        boolean queryHasUnrecognizedPart = false;
+        return new UHComponents(field, fieldMatcher, query, terms, phraseHelper, automata, queryHasUnrecognizedPart, highlightFlags);
+      }
+
+      @Override
       protected FieldOffsetStrategy getOffsetStrategy(OffsetSource offsetSource, UHComponents components) {
         return super.getOffsetStrategy(offsetSource, components);
       }
diff --git a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
index 2a3e3a7..f03376d 100644
--- a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
+++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
@@ -302,4 +302,16 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
         "//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>alpha</em> <em>bravo</em> charlie'");
   }
 
+  // LUCENE-8492
+  public void testSurroundQParser() {
+    assertQ(req("q", "{!surround df=text}2w(second, document)", "hl", "true", "hl.fl", "text"),
+        "count(//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/*)=1");
+  }
+
+  // LUCENE-7757
+  public void testComplexPhraseQParser() {
+    assertQ(req("q", "{!complexphrase df=text}(\"sec* doc*\")", "hl", "true", "hl.fl", "text"),
+        "count(//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/*)=1");
+  }
+
 }