You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/01/02 15:27:14 UTC

svn commit: r1226431 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/contrib/analyzers/ lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/ lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/ lucene...

Author: simonw
Date: Mon Jan  2 14:27:14 2012
New Revision: 1226431

URL: http://svn.apache.org/viewvc?rev=1226431&view=rev
Log:
LUCENE-3665: Make WeightedSpanTermExtractor extensible to handle custom query implemenations

Added:
    lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/PositionSpan.java
      - copied unchanged from r1226417, lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/PositionSpan.java
    lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/
      - copied from r1226417, lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/
    lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java
      - copied, changed from r1226417, lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java
Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
    lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java
    lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
    lucene/dev/branches/branch_3x/lucene/contrib/icu/   (props changed)
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java?rev=1226431&r1=1226430&r2=1226431&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java Mon Jan  2 14:27:14 2012
@@ -208,8 +208,7 @@ public class QueryScorer implements Scor
   }
   
   private TokenStream initExtractor(TokenStream tokenStream) throws IOException {
-    WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor()
-        : new WeightedSpanTermExtractor(defaultField);
+    WeightedSpanTermExtractor qse = newTermExtractor(defaultField);
     qse.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
     qse.setExpandMultiTermQuery(expandMultiTermQuery);
     qse.setWrapIfNotCachingTokenFilter(wrapToCaching);
@@ -226,6 +225,11 @@ public class QueryScorer implements Scor
     
     return null;
   }
+  
+  protected WeightedSpanTermExtractor newTermExtractor(String defaultField) {
+    return defaultField == null ? new WeightedSpanTermExtractor()
+    : new WeightedSpanTermExtractor(defaultField);
+  }
 
   /*
    * (non-Javadoc)

Modified: lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java?rev=1226431&r1=1226430&r2=1226431&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java Mon Jan  2 14:27:14 2012
@@ -89,16 +89,7 @@ public class WeightedSpanTerm extends We
   public List<PositionSpan> getPositionSpans() {
     return positionSpans;
   }
-}
 
+}
 
-// Utility class to store a Span
-class PositionSpan {
-  int start;
-  int end;
 
-  public PositionSpan(int start, int end) {
-    this.start = start;
-    this.end = end;
-  }
-}

Modified: lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1226431&r1=1226430&r2=1226431&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Mon Jan  2 14:27:14 2012
@@ -89,7 +89,7 @@ public class WeightedSpanTermExtractor {
    *          Map to place created WeightedSpanTerms in
    * @throws IOException
    */
-  private void extract(Query query, Map<String,WeightedSpanTerm> terms) throws IOException {
+  protected void extract(Query query, Map<String,WeightedSpanTerm> terms) throws IOException {
     if (query instanceof BooleanQuery) {
       BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
 
@@ -207,6 +207,12 @@ public class WeightedSpanTermExtractor {
         extractWeightedSpanTerms(terms, sp);
       }
     }
+    extractUnknownQuery(query, terms);
+  }
+
+  protected void extractUnknownQuery(Query query,
+      Map<String, WeightedSpanTerm> terms) throws IOException {
+    // for sub-classing to extract custom queries
   }
 
   /**
@@ -218,7 +224,7 @@ public class WeightedSpanTermExtractor {
    *          SpanQuery to extract Terms from
    * @throws IOException
    */
-  private void extractWeightedSpanTerms(Map<String,WeightedSpanTerm> terms, SpanQuery spanQuery) throws IOException {
+  protected void extractWeightedSpanTerms(Map<String,WeightedSpanTerm> terms, SpanQuery spanQuery) throws IOException {
     Set<String> fieldNames;
 
     if (fieldName == null) {
@@ -300,7 +306,7 @@ public class WeightedSpanTermExtractor {
    *          Query to extract Terms from
    * @throws IOException
    */
-  private void extractWeightedTerms(Map<String,WeightedSpanTerm> terms, Query query) throws IOException {
+  protected void extractWeightedTerms(Map<String,WeightedSpanTerm> terms, Query query) throws IOException {
     Set<Term> nonWeightedTerms = new HashSet<Term>();
     query.extractTerms(nonWeightedTerms);
 
@@ -316,13 +322,13 @@ public class WeightedSpanTermExtractor {
   /**
    * Necessary to implement matches for queries against <code>defaultField</code>
    */
-  private boolean fieldNameComparator(String fieldNameToCheck) {
+  protected boolean fieldNameComparator(String fieldNameToCheck) {
     boolean rv = fieldName == null || fieldNameToCheck == fieldName
         || fieldNameToCheck == defaultField;
     return rv;
   }
 
-  private IndexReader getReaderForField(String field) throws IOException {
+  protected IndexReader getReaderForField(String field) throws IOException {
     if(wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) {
       tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze));
       cachedTokenStream = true;
@@ -443,7 +449,7 @@ public class WeightedSpanTermExtractor {
     return terms;
   }
   
-  private void collectSpanQueryFields(SpanQuery spanQuery, Set<String> fieldNames) {
+  protected void collectSpanQueryFields(SpanQuery spanQuery, Set<String> fieldNames) {
     if (spanQuery instanceof FieldMaskingSpanQuery) {
       collectSpanQueryFields(((FieldMaskingSpanQuery)spanQuery).getMaskedQuery(), fieldNames);
     } else if (spanQuery instanceof SpanFirstQuery) {
@@ -463,7 +469,7 @@ public class WeightedSpanTermExtractor {
     }
   }
   
-  private boolean mustRewriteQuery(SpanQuery spanQuery) {
+  protected boolean mustRewriteQuery(SpanQuery spanQuery) {
     if (!expandMultiTermQuery) {
       return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery.
     } else if (spanQuery instanceof FieldMaskingSpanQuery) {
@@ -498,7 +504,8 @@ public class WeightedSpanTermExtractor {
    * This class makes sure that if both position sensitive and insensitive
    * versions of the same term are added, the position insensitive one wins.
    */
-  static private class PositionCheckingMap<K> extends HashMap<K,WeightedSpanTerm> {
+  @SuppressWarnings("serial")
+  protected static class PositionCheckingMap<K> extends HashMap<K,WeightedSpanTerm> {
 
     @Override
     public void putAll(Map<? extends K,? extends WeightedSpanTerm> m) {

Copied: lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java (from r1226417, lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java?p2=lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java&p1=lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java&r1=1226417&r2=1226431&rev=1226431&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/custom/HighlightCustomQueryTest.java Mon Jan  2 14:27:14 2012
@@ -20,9 +20,10 @@ import java.io.IOException;
 import java.io.StringReader;
 import java.util.Map;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.MockTokenFilter;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
@@ -91,7 +92,7 @@ public class HighlightCustomQueryTest ex
   private static String highlightField(Query query, String fieldName,
       String text) throws IOException, InvalidTokenOffsetsException {
     TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE,
-        true, MockTokenFilter.ENGLISH_STOPSET, true).tokenStream(fieldName,
+        true, (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET, true).tokenStream(fieldName,
         new StringReader(text));
     // Assuming "<B>", "</B>" used to highlight
     SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
@@ -139,6 +140,7 @@ public class HighlightCustomQueryTest ex
 
   }
 
+  @SuppressWarnings("serial")
   public static class CustomQuery extends Query {
     private final Term term;