You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/01/02 14:49:24 UTC
svn commit: r1226417 - in /lucene/dev/trunk/lucene/contrib/highlighter/src:
java/org/apache/lucene/search/highlight/
test/org/apache/lucene/search/highlight/custom/
Author: simonw
Date: Mon Jan 2 13:49:23 2012
New Revision: 1226417
URL: http://svn.apache.org/viewvc?rev=1226417&view=rev
Log:
LUCENE-3665: Make WeightedSpanTermExtractor extensible to handle custom query implemenations
Added:
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/PositionSpan.java
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java
Modified:
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
Added: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/PositionSpan.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/PositionSpan.java?rev=1226417&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/PositionSpan.java (added)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/PositionSpan.java Mon Jan 2 13:49:23 2012
@@ -0,0 +1,31 @@
+package org.apache.lucene.search.highlight;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Utility class to record Positions Spans
+ * @lucene.internal
+ */
+public class PositionSpan {
+ int start;
+ int end;
+
+ public PositionSpan(int start, int end) {
+ this.start = start;
+ this.end = end;
+ }
+}
\ No newline at end of file
Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java?rev=1226417&r1=1226416&r2=1226417&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java Mon Jan 2 13:49:23 2012
@@ -207,8 +207,7 @@ public class QueryScorer implements Scor
}
private TokenStream initExtractor(TokenStream tokenStream) throws IOException {
- WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor()
- : new WeightedSpanTermExtractor(defaultField);
+ WeightedSpanTermExtractor qse = newTermExtractor(defaultField);
qse.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
qse.setExpandMultiTermQuery(expandMultiTermQuery);
qse.setWrapIfNotCachingTokenFilter(wrapToCaching);
@@ -225,6 +224,11 @@ public class QueryScorer implements Scor
return null;
}
+
+ protected WeightedSpanTermExtractor newTermExtractor(String defaultField) {
+ return defaultField == null ? new WeightedSpanTermExtractor()
+ : new WeightedSpanTermExtractor(defaultField);
+ }
/*
* (non-Javadoc)
Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java?rev=1226417&r1=1226416&r2=1226417&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java Mon Jan 2 13:49:23 2012
@@ -89,16 +89,7 @@ public class WeightedSpanTerm extends We
public List<PositionSpan> getPositionSpans() {
return positionSpans;
}
-}
+}
-// Utility class to store a Span
-class PositionSpan {
- int start;
- int end;
- public PositionSpan(int start, int end) {
- this.start = start;
- this.end = end;
- }
-}
Modified: lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1226417&r1=1226416&r2=1226417&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Mon Jan 2 13:49:23 2012
@@ -90,7 +90,7 @@ public class WeightedSpanTermExtractor {
* Map to place created WeightedSpanTerms in
* @throws IOException
*/
- private void extract(Query query, Map<String,WeightedSpanTerm> terms) throws IOException {
+ protected void extract(Query query, Map<String,WeightedSpanTerm> terms) throws IOException {
if (query instanceof BooleanQuery) {
BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
@@ -206,6 +206,12 @@ public class WeightedSpanTermExtractor {
extractWeightedSpanTerms(terms, sp);
}
}
+ extractUnknownQuery(query, terms);
+ }
+
+ protected void extractUnknownQuery(Query query,
+ Map<String, WeightedSpanTerm> terms) throws IOException {
+ // for sub-classing to extract custom queries
}
/**
@@ -217,7 +223,7 @@ public class WeightedSpanTermExtractor {
* SpanQuery to extract Terms from
* @throws IOException
*/
- private void extractWeightedSpanTerms(Map<String,WeightedSpanTerm> terms, SpanQuery spanQuery) throws IOException {
+ protected void extractWeightedSpanTerms(Map<String,WeightedSpanTerm> terms, SpanQuery spanQuery) throws IOException {
Set<String> fieldNames;
if (fieldName == null) {
@@ -305,7 +311,7 @@ public class WeightedSpanTermExtractor {
* Query to extract Terms from
* @throws IOException
*/
- private void extractWeightedTerms(Map<String,WeightedSpanTerm> terms, Query query) throws IOException {
+ protected void extractWeightedTerms(Map<String,WeightedSpanTerm> terms, Query query) throws IOException {
Set<Term> nonWeightedTerms = new HashSet<Term>();
query.extractTerms(nonWeightedTerms);
@@ -321,13 +327,13 @@ public class WeightedSpanTermExtractor {
/**
* Necessary to implement matches for queries against <code>defaultField</code>
*/
- private boolean fieldNameComparator(String fieldNameToCheck) {
+ protected boolean fieldNameComparator(String fieldNameToCheck) {
boolean rv = fieldName == null || fieldName.equals(fieldNameToCheck)
|| (defaultField != null && defaultField.equals(fieldNameToCheck));
return rv;
}
- private AtomicReaderContext getLeafContextForField(String field) throws IOException {
+ protected AtomicReaderContext getLeafContextForField(String field) throws IOException {
if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
cachedTokenStream = true;
@@ -449,7 +455,7 @@ public class WeightedSpanTermExtractor {
return terms;
}
- private void collectSpanQueryFields(SpanQuery spanQuery, Set<String> fieldNames) {
+ protected void collectSpanQueryFields(SpanQuery spanQuery, Set<String> fieldNames) {
if (spanQuery instanceof FieldMaskingSpanQuery) {
collectSpanQueryFields(((FieldMaskingSpanQuery)spanQuery).getMaskedQuery(), fieldNames);
} else if (spanQuery instanceof SpanFirstQuery) {
@@ -469,7 +475,7 @@ public class WeightedSpanTermExtractor {
}
}
- private boolean mustRewriteQuery(SpanQuery spanQuery) {
+ protected boolean mustRewriteQuery(SpanQuery spanQuery) {
if (!expandMultiTermQuery) {
return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery.
} else if (spanQuery instanceof FieldMaskingSpanQuery) {
@@ -504,7 +510,8 @@ public class WeightedSpanTermExtractor {
* This class makes sure that if both position sensitive and insensitive
* versions of the same term are added, the position insensitive one wins.
*/
- static private class PositionCheckingMap<K> extends HashMap<K,WeightedSpanTerm> {
+ @SuppressWarnings("serial")
+ protected static class PositionCheckingMap<K> extends HashMap<K,WeightedSpanTerm> {
@Override
public void putAll(Map<? extends K,? extends WeightedSpanTerm> m) {
Added: lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java?rev=1226417&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java (added)
+++ lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java Mon Jan 2 13:49:23 2012
@@ -0,0 +1,186 @@
+package org.apache.lucene.search.highlight.custom;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Map;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.analysis.MockTokenFilter;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleFragmenter;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.WeightedSpanTerm;
+import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
+import org.apache.lucene.util.LuceneTestCase;
+
+/**
+ * Tests the extensibility of {@link WeightedSpanTermExtractor} and
+ * {@link QueryScorer} in a user defined package
+ */
+public class HighlightCustomQueryTest extends LuceneTestCase {
+
+ private static final String FIELD_NAME = "contents";
+
+ public void testHighlightCustomQuery() throws IOException,
+ InvalidTokenOffsetsException {
+ String s1 = "I call our world Flatland, not because we call it so,";
+
+ // Verify that a query against the default field results in text being
+ // highlighted
+ // regardless of the field name.
+
+ CustomQuery q = new CustomQuery(new Term(FIELD_NAME, "world"));
+
+ String expected = "I call our <B>world</B> Flatland, not because we call it so,";
+ String observed = highlightField(q, "SOME_FIELD_NAME", s1);
+ if (VERBOSE)
+ System.out.println("Expected: \"" + expected + "\n" + "Observed: \""
+ + observed);
+ assertEquals(
+ "Query in the default field results in text for *ANY* field being highlighted",
+ expected, observed);
+
+ // Verify that a query against a named field does not result in any
+ // highlighting
+ // when the query field name differs from the name of the field being
+ // highlighted,
+ // which in this example happens to be the default field name.
+ q = new CustomQuery(new Term("text", "world"));
+
+ expected = s1;
+ observed = highlightField(q, FIELD_NAME, s1);
+ if (VERBOSE)
+ System.out.println("Expected: \"" + expected + "\n" + "Observed: \""
+ + observed);
+ assertEquals(
+ "Query in a named field does not result in highlighting when that field isn't in the query",
+ s1, highlightField(q, FIELD_NAME, s1));
+
+ }
+
+ /**
+ * This method intended for use with
+ * <tt>testHighlightingWithDefaultField()</tt>
+ *
+ * @throws InvalidTokenOffsetsException
+ */
+ private static String highlightField(Query query, String fieldName,
+ String text) throws IOException, InvalidTokenOffsetsException {
+ TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE,
+ true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName,
+ new StringReader(text));
+ // Assuming "<B>", "</B>" used to highlight
+ SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
+ MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
+ Highlighter highlighter = new Highlighter(formatter, scorer);
+ highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));
+
+ String rv = highlighter.getBestFragments(tokenStream, text, 1,
+ "(FIELD TEXT TRUNCATED)");
+ return rv.length() == 0 ? text : rv;
+ }
+
+ public static class MyWeightedSpanTermExtractor extends
+ WeightedSpanTermExtractor {
+
+ public MyWeightedSpanTermExtractor() {
+ super();
+ }
+
+ public MyWeightedSpanTermExtractor(String defaultField) {
+ super(defaultField);
+ }
+
+ @Override
+ protected void extractUnknownQuery(Query query,
+ Map<String, WeightedSpanTerm> terms) throws IOException {
+ if (query instanceof CustomQuery) {
+ extractWeightedTerms(terms, new TermQuery(((CustomQuery) query).term));
+ }
+ }
+
+ }
+
+ public static class MyQueryScorer extends QueryScorer {
+
+ public MyQueryScorer(Query query, String field, String defaultField) {
+ super(query, field, defaultField);
+ }
+
+ @Override
+ protected WeightedSpanTermExtractor newTermExtractor(String defaultField) {
+ return defaultField == null ? new MyWeightedSpanTermExtractor()
+ : new MyWeightedSpanTermExtractor(defaultField);
+ }
+
+ }
+
+ public static class CustomQuery extends Query {
+ private final Term term;
+
+ public CustomQuery(Term term) {
+ super();
+ this.term = term;
+ }
+
+ @Override
+ public String toString(String field) {
+ return new TermQuery(term).toString(field);
+ }
+
+ @Override
+ public Query rewrite(IndexReader reader) throws IOException {
+ return new TermQuery(term);
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = super.hashCode();
+ result = prime * result + ((term == null) ? 0 : term.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (!super.equals(obj))
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ CustomQuery other = (CustomQuery) obj;
+ if (term == null) {
+ if (other.term != null)
+ return false;
+ } else if (!term.equals(other.term))
+ return false;
+ return true;
+ }
+
+ }
+}