You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by kr...@apache.org on 2017/01/12 16:51:57 UTC
[40/43] lucene-solr:jira/solr-8593: LUCENE-7559: UH: Also expose OffsetsEnum, and test it's exposure

LUCENE-7559: UH: Also expose OffsetsEnum, and test it's exposure


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7435ab18
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7435ab18
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7435ab18

Branch: refs/heads/jira/solr-8593
Commit: 7435ab18786a43f9004c44713446380711137b79
Parents: 04f45aa
Author: David Smiley <ds...@apache.org>
Authored: Thu Jan 12 00:29:17 2017 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Thu Jan 12 00:29:17 2017 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 +-
 .../search/uhighlight/FieldHighlighter.java     |  4 +-
 .../lucene/search/uhighlight/OffsetsEnum.java   | 45 +++++++++++-----
 .../TestUnifiedHighlighterExtensibility.java    | 57 +++++++++++++++++++-
 4 files changed, 92 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7435ab18/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 109a534..4912920 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -252,8 +252,8 @@ Other
 
 * LUCENE-7534: fix smokeTestRelease.py to run on Cygwin (Mikhail Khludnev)
 
-* LUCENE-7559: UnifiedHighlighter: Make Passage more exposed to allow passage creation to
-  be customized. (David Smiley)
+* LUCENE-7559: UnifiedHighlighter: Make Passage and OffsetsEnum more exposed to allow
+  passage creation to be customized. (David Smiley)
 
 * LUCENE-7599: Simplify TestRandomChains using Java's built-in Predicate and
   Function interfaces. (Ahmet Arslan via Adrien Grand)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7435ab18/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
index 1caa739..cc9f318 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
@@ -138,7 +138,7 @@ public class FieldHighlighter {
 
     PriorityQueue<OffsetsEnum> offsetsEnumQueue = new PriorityQueue<>(offsetsEnums.size() + 1);
     for (OffsetsEnum off : offsetsEnums) {
-      off.weight = scorer.weight(contentLength, off.postingsEnum.freq());
+      off.setWeight(scorer.weight(contentLength, off.freq()));
       off.nextPosition(); // go to first position
       offsetsEnumQueue.add(off);
     }
@@ -214,7 +214,7 @@ public class FieldHighlighter {
           break;
         }
       }
-      passage.setScore(passage.getScore() + off.weight * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
+      passage.setScore(passage.getScore() + off.getWeight() * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
     }
 
     Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7435ab18/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
index db1ea1f..708f5c3 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
@@ -18,24 +18,25 @@ package org.apache.lucene.search.uhighlight;
 
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.List;
 import java.util.Objects;
 
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.util.BytesRef;
 
 /**
- * Holds the term &amp; PostingsEnum, and info for tracking the occurrences of a term within the text.
- * It is advanced with the underlying postings and is placed in a priority queue by highlightOffsetsEnums
- * based on the start offset.
+ * Holds the term ({@link BytesRef}), {@link PostingsEnum}, offset iteration tracking.
+ * It is advanced with the underlying postings and is placed in a priority queue by
+ * {@link FieldHighlighter#highlightOffsetsEnums(List)} based on the start offset.
  *
  * @lucene.internal
  */
 public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
   private final BytesRef term;
-  final PostingsEnum postingsEnum; // with offsets
+  private final PostingsEnum postingsEnum; // with offsets
 
-  float weight; // set once in highlightOffsetsEnums
-  private int pos = 0; // the occurrence counter of this term within the text being highlighted.
+  private float weight; // set once in highlightOffsetsEnums
+  private int posCounter = 0; // the occurrence counter of this term within the text being highlighted.
 
   public OffsetsEnum(BytesRef term, PostingsEnum postingsEnum) throws IOException {
     this.term = term; // can be null
@@ -65,29 +66,47 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
     }
   }
 
-  BytesRef getTerm() throws IOException {
+  /** The term at this position; usually always the same. This term is a reference that is safe to continue to refer to,
+   * even after we move to next position. */
+  public BytesRef getTerm() throws IOException {
     // TODO TokenStreamOffsetStrategy could override OffsetsEnum; then remove this hack here
     return term != null ? term : postingsEnum.getPayload(); // abusing payload like this is a total hack!
   }
 
-  boolean hasMorePositions() throws IOException {
-    return pos < postingsEnum.freq();
+  public PostingsEnum getPostingsEnum() {
+    return postingsEnum;
   }
 
-  void nextPosition() throws IOException {
+  public int freq() throws IOException {
+    return postingsEnum.freq();
+  }
+
+  public boolean hasMorePositions() throws IOException {
+    return posCounter < postingsEnum.freq();
+  }
+
+  public void nextPosition() throws IOException {
     assert hasMorePositions();
-    pos++;
+    posCounter++;
     postingsEnum.nextPosition();
   }
 
-  int startOffset() throws IOException {
+  public int startOffset() throws IOException {
     return postingsEnum.startOffset();
   }
 
-  int endOffset() throws IOException {
+  public int endOffset() throws IOException {
     return postingsEnum.endOffset();
   }
 
+  public float getWeight() {
+    return weight;
+  }
+
+  public void setWeight(float weight) {
+    this.weight = weight;
+  }
+
   @Override
   public void close() throws IOException {
     // TODO TokenStreamOffsetStrategy could override OffsetsEnum; then this base impl would be no-op.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7435ab18/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
index 10757a5..08055a2 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
@@ -40,12 +40,16 @@ import org.apache.lucene.search.uhighlight.Passage;
 import org.apache.lucene.search.uhighlight.PassageFormatter;
 import org.apache.lucene.search.uhighlight.PassageScorer;
 import org.apache.lucene.search.uhighlight.PhraseHelper;
+import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
 import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 import org.junit.Test;
 
+/**
+ * Helps us be aware of visibility/extensibility concerns.
+ */
 public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
 
   /**
@@ -144,7 +148,19 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
 
       @Override
       protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
-        return super.getFieldHighlighter(field, query, allTerms, maxPassages);
+        // THIS IS A COPY of the superclass impl; but use CustomFieldHighlighter
+        BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
+        Set<HighlightFlag> highlightFlags = getFlags(field);
+        PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
+        CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
+        OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
+        return new CustomFieldHighlighter(field,
+            getOffsetStrategy(offsetSource, field, terms, phraseHelper, automata, highlightFlags),
+            new SplittingBreakIterator(getBreakIterator(field), UnifiedHighlighter.MULTIVAL_SEP_CHAR),
+            getScorer(field),
+            maxPassages,
+            getMaxNoHighlightPassages(field),
+            getFormatter(field));
       }
 
       @Override
@@ -185,4 +201,43 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
     assertEquals(fieldHighlighter.getField(), fieldName);
   }
 
+  /** Tests maintaining extensibility/visibility of {@link org.apache.lucene.search.uhighlight.FieldHighlighter} out of package. */
+  private class CustomFieldHighlighter extends FieldHighlighter {
+    CustomFieldHighlighter(String field, FieldOffsetStrategy fieldOffsetStrategy, BreakIterator breakIterator, PassageScorer passageScorer, int maxPassages, int maxNoHighlightPassages, PassageFormatter passageFormatter) {
+      super(field, fieldOffsetStrategy, breakIterator, passageScorer, maxPassages, maxNoHighlightPassages, passageFormatter);
+    }
+
+    @Override
+    public Object highlightFieldForDoc(IndexReader reader, int docId, String content) throws IOException {
+      return super.highlightFieldForDoc(reader, docId, content);
+    }
+
+    @Override
+    protected Passage[] highlightOffsetsEnums(List<OffsetsEnum> offsetsEnums) throws IOException {
+      // TEST OffsetsEnums & Passage visibility
+
+      // this code never runs; just for compilation
+      OffsetsEnum oe = new OffsetsEnum(null, EMPTY);
+      oe.getTerm();
+      oe.getPostingsEnum();
+      oe.freq();
+      oe.hasMorePositions();
+      oe.nextPosition();
+      oe.startOffset();
+      oe.endOffset();
+      oe.getWeight();
+      oe.setWeight(2f);
+
+      Passage p = new Passage();
+      p.setStartOffset(0);
+      p.setEndOffset(9);
+      p.setScore(1f);
+      p.addMatch(1, 2, new BytesRef());
+      p.reset();
+      p.sort();
+      //... getters are all exposed; custom PassageFormatter impls uses them
+
+      return super.highlightOffsetsEnums(offsetsEnums);
+    }
+  }
 }