You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2017/01/16 00:10:51 UTC
[33/50] [abbrv] lucene-solr:jira/solr-5944: LUCENE-7559: UH: Also
expose OffsetsEnum, and test it's exposure
LUCENE-7559: UH: Also expose OffsetsEnum, and test it's exposure
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7435ab18
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7435ab18
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7435ab18
Branch: refs/heads/jira/solr-5944
Commit: 7435ab18786a43f9004c44713446380711137b79
Parents: 04f45aa
Author: David Smiley <ds...@apache.org>
Authored: Thu Jan 12 00:29:17 2017 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Thu Jan 12 00:29:17 2017 -0500
----------------------------------------------------------------------
lucene/CHANGES.txt | 4 +-
.../search/uhighlight/FieldHighlighter.java | 4 +-
.../lucene/search/uhighlight/OffsetsEnum.java | 45 +++++++++++-----
.../TestUnifiedHighlighterExtensibility.java | 57 +++++++++++++++++++-
4 files changed, 92 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7435ab18/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 109a534..4912920 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -252,8 +252,8 @@ Other
* LUCENE-7534: fix smokeTestRelease.py to run on Cygwin (Mikhail Khludnev)
-* LUCENE-7559: UnifiedHighlighter: Make Passage more exposed to allow passage creation to
- be customized. (David Smiley)
+* LUCENE-7559: UnifiedHighlighter: Make Passage and OffsetsEnum more exposed to allow
+ passage creation to be customized. (David Smiley)
* LUCENE-7599: Simplify TestRandomChains using Java's built-in Predicate and
Function interfaces. (Ahmet Arslan via Adrien Grand)
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7435ab18/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
index 1caa739..cc9f318 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/FieldHighlighter.java
@@ -138,7 +138,7 @@ public class FieldHighlighter {
PriorityQueue<OffsetsEnum> offsetsEnumQueue = new PriorityQueue<>(offsetsEnums.size() + 1);
for (OffsetsEnum off : offsetsEnums) {
- off.weight = scorer.weight(contentLength, off.postingsEnum.freq());
+ off.setWeight(scorer.weight(contentLength, off.freq()));
off.nextPosition(); // go to first position
offsetsEnumQueue.add(off);
}
@@ -214,7 +214,7 @@ public class FieldHighlighter {
break;
}
}
- passage.setScore(passage.getScore() + off.weight * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
+ passage.setScore(passage.getScore() + off.getWeight() * scorer.tf(tf, passage.getEndOffset() - passage.getStartOffset()));
}
Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]);
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7435ab18/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
index db1ea1f..708f5c3 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/OffsetsEnum.java
@@ -18,24 +18,25 @@ package org.apache.lucene.search.uhighlight;
import java.io.Closeable;
import java.io.IOException;
+import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.util.BytesRef;
/**
- * Holds the term & PostingsEnum, and info for tracking the occurrences of a term within the text.
- * It is advanced with the underlying postings and is placed in a priority queue by highlightOffsetsEnums
- * based on the start offset.
+ * Holds the term ({@link BytesRef}), {@link PostingsEnum}, offset iteration tracking.
+ * It is advanced with the underlying postings and is placed in a priority queue by
+ * {@link FieldHighlighter#highlightOffsetsEnums(List)} based on the start offset.
*
* @lucene.internal
*/
public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
private final BytesRef term;
- final PostingsEnum postingsEnum; // with offsets
+ private final PostingsEnum postingsEnum; // with offsets
- float weight; // set once in highlightOffsetsEnums
- private int pos = 0; // the occurrence counter of this term within the text being highlighted.
+ private float weight; // set once in highlightOffsetsEnums
+ private int posCounter = 0; // the occurrence counter of this term within the text being highlighted.
public OffsetsEnum(BytesRef term, PostingsEnum postingsEnum) throws IOException {
this.term = term; // can be null
@@ -65,29 +66,47 @@ public class OffsetsEnum implements Comparable<OffsetsEnum>, Closeable {
}
}
- BytesRef getTerm() throws IOException {
+ /** The term at this position; usually always the same. This term is a reference that is safe to continue to refer to,
+ * even after we move to next position. */
+ public BytesRef getTerm() throws IOException {
// TODO TokenStreamOffsetStrategy could override OffsetsEnum; then remove this hack here
return term != null ? term : postingsEnum.getPayload(); // abusing payload like this is a total hack!
}
- boolean hasMorePositions() throws IOException {
- return pos < postingsEnum.freq();
+ public PostingsEnum getPostingsEnum() {
+ return postingsEnum;
}
- void nextPosition() throws IOException {
+ public int freq() throws IOException {
+ return postingsEnum.freq();
+ }
+
+ public boolean hasMorePositions() throws IOException {
+ return posCounter < postingsEnum.freq();
+ }
+
+ public void nextPosition() throws IOException {
assert hasMorePositions();
- pos++;
+ posCounter++;
postingsEnum.nextPosition();
}
- int startOffset() throws IOException {
+ public int startOffset() throws IOException {
return postingsEnum.startOffset();
}
- int endOffset() throws IOException {
+ public int endOffset() throws IOException {
return postingsEnum.endOffset();
}
+ public float getWeight() {
+ return weight;
+ }
+
+ public void setWeight(float weight) {
+ this.weight = weight;
+ }
+
@Override
public void close() throws IOException {
// TODO TokenStreamOffsetStrategy could override OffsetsEnum; then this base impl would be no-op.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7435ab18/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
index 10757a5..08055a2 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
@@ -40,12 +40,16 @@ import org.apache.lucene.search.uhighlight.Passage;
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.PassageScorer;
import org.apache.lucene.search.uhighlight.PhraseHelper;
+import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.junit.Test;
+/**
+ * Helps us be aware of visibility/extensibility concerns.
+ */
public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
/**
@@ -144,7 +148,19 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
@Override
protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
- return super.getFieldHighlighter(field, query, allTerms, maxPassages);
+ // THIS IS A COPY of the superclass impl; but use CustomFieldHighlighter
+ BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
+ Set<HighlightFlag> highlightFlags = getFlags(field);
+ PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
+ CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
+ OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
+ return new CustomFieldHighlighter(field,
+ getOffsetStrategy(offsetSource, field, terms, phraseHelper, automata, highlightFlags),
+ new SplittingBreakIterator(getBreakIterator(field), UnifiedHighlighter.MULTIVAL_SEP_CHAR),
+ getScorer(field),
+ maxPassages,
+ getMaxNoHighlightPassages(field),
+ getFormatter(field));
}
@Override
@@ -185,4 +201,43 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
assertEquals(fieldHighlighter.getField(), fieldName);
}
+ /** Tests maintaining extensibility/visibility of {@link org.apache.lucene.search.uhighlight.FieldHighlighter} out of package. */
+ private class CustomFieldHighlighter extends FieldHighlighter {
+ CustomFieldHighlighter(String field, FieldOffsetStrategy fieldOffsetStrategy, BreakIterator breakIterator, PassageScorer passageScorer, int maxPassages, int maxNoHighlightPassages, PassageFormatter passageFormatter) {
+ super(field, fieldOffsetStrategy, breakIterator, passageScorer, maxPassages, maxNoHighlightPassages, passageFormatter);
+ }
+
+ @Override
+ public Object highlightFieldForDoc(IndexReader reader, int docId, String content) throws IOException {
+ return super.highlightFieldForDoc(reader, docId, content);
+ }
+
+ @Override
+ protected Passage[] highlightOffsetsEnums(List<OffsetsEnum> offsetsEnums) throws IOException {
+ // TEST OffsetsEnums & Passage visibility
+
+ // this code never runs; just for compilation
+ OffsetsEnum oe = new OffsetsEnum(null, EMPTY);
+ oe.getTerm();
+ oe.getPostingsEnum();
+ oe.freq();
+ oe.hasMorePositions();
+ oe.nextPosition();
+ oe.startOffset();
+ oe.endOffset();
+ oe.getWeight();
+ oe.setWeight(2f);
+
+ Passage p = new Passage();
+ p.setStartOffset(0);
+ p.setEndOffset(9);
+ p.setScore(1f);
+ p.addMatch(1, 2, new BytesRef());
+ p.reset();
+ p.sort();
+ //... getters are all exposed; custom PassageFormatter impls uses them
+
+ return super.highlightOffsetsEnums(offsetsEnums);
+ }
+ }
}