You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by yo...@apache.org on 2006/07/13 20:22:25 UTC

svn commit: r421678 - in /incubator/solr/trunk: ./ src/java/org/apache/solr/request/ src/java/org/apache/solr/util/ src/test/test-files/solr/conf/

Author: yonik
Date: Thu Jul 13 11:22:24 2006
New Revision: 421678

URL: http://svn.apache.org/viewvc?rev=421678&view=rev
Log:
highlighting: SOLR-24

Modified:
    incubator/solr/trunk/CHANGES.txt
    incubator/solr/trunk/src/java/org/apache/solr/request/DisMaxRequestHandler.java
    incubator/solr/trunk/src/java/org/apache/solr/request/StandardRequestHandler.java
    incubator/solr/trunk/src/java/org/apache/solr/util/SolrPluginUtils.java
    incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml

Modified: incubator/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/CHANGES.txt?rev=421678&r1=421677&r2=421678&view=diff
==============================================================================
--- incubator/solr/trunk/CHANGES.txt (original)
+++ incubator/solr/trunk/CHANGES.txt Thu Jul 13 11:22:24 2006
@@ -18,14 +18,15 @@
 10. copyField accepts dynamicfield-like names as the source.
     (Darren Erik Vengroff via yonik, SOLR-21)
 11. new DocSet.andNot(), DocSet.andNotSize() (yonik)
-12. Ability to store term vectors. (Note: standard request handler does
-    not currently do anything with term vectors) (Mike Klaas via yonik, SOLR-23)
+12. Ability to store term vectors for fields. (Mike Klaas via yonik, SOLR-23)
 13. New abstract BufferedTokenStream for people who want to write
     Tokenizers or TokenFilters that require arbitrary buffering of the
     stream. (SOLR-11 / yonik, hossman)    
 14. New RemoveDuplicatesToken - useful in situations where
     synonyms, stemming, or word-deliminater-ing produce identical tokens at
     the same position. (SOLR-11 / yonik, hossman)
+15. Added highlighting to SolrPluginUtils and implemented in StandardRequestHandler
+    and DisMaxRequestHandler (SOLR-24 / Mike Klaas via hossman,yonik)
 
 Changes in runtime behavior
  1. classes reorganized into different packages, package names changed to Apache

Modified: incubator/solr/trunk/src/java/org/apache/solr/request/DisMaxRequestHandler.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/request/DisMaxRequestHandler.java?rev=421678&r1=421677&r2=421678&view=diff
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/request/DisMaxRequestHandler.java (original)
+++ incubator/solr/trunk/src/java/org/apache/solr/request/DisMaxRequestHandler.java Thu Jul 13 11:22:24 2006
@@ -41,6 +41,7 @@
 import org.apache.solr.util.StrUtils;
 import org.apache.solr.util.NamedList;
 import org.apache.solr.util.SolrPluginUtils;
+import org.apache.solr.util.DisMaxParams;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -161,7 +162,7 @@
     /* :NOOP */
   }
 
-  protected final U.CommonParams params = new U.CommonParams();
+  protected final DisMaxParams params = new DisMaxParams();
     
   public DisMaxRequestHandler() {
     super();
@@ -218,7 +219,8 @@
     numRequests++;
         
     try {
-            
+
+      int flags = 0;
       SolrIndexSearcher s = req.getSearcher();
       IndexSchema schema = req.getSchema();
             
@@ -267,7 +269,7 @@
 
       if (dis instanceof BooleanQuery) {
         BooleanQuery t = new BooleanQuery();
-        U.flatenBooleanQuery(t, (BooleanQuery)dis);
+        U.flattenBooleanQuery(t, (BooleanQuery)dis);
 
         U.setMinShouldMatch(t, minShouldMatch);
                 
@@ -332,19 +334,19 @@
             
       /* * * Generate Main Results * * */
 
+      flags |= U.setReturnFields(U.getParam(req, params.FL, params.fl), rsp);
       DocList results = s.getDocList(query, restrictions,
                                      SolrPluginUtils.getSort(req),
                                      req.getStart(), req.getLimit(),
-                                     SolrIndexSearcher.GET_SCORES);
+                                     flags);
       rsp.add("search-results",results);
 
-      U.setReturnFields(U.getParam(req, params.FL, params.fl), rsp);
 
             
       /* * * Debugging Info * * */
 
       try {
-        NamedList debug = U.doStandardDebug(req, userQuery, query, results);
+        NamedList debug = U.doStandardDebug(req, userQuery, query, results, params);
         if (null != debug) {
           debug.add("boostquery", boostQuery);
           debug.add("boostfunc", boostFunc);
@@ -362,6 +364,18 @@
         SolrException.logOnce(SolrCore.log,
                               "Exception durring debug", e);
         rsp.add("exception_during_debug", SolrException.toStr(e));
+      }
+
+      /* * * Highlighting/Summarizing  * * */
+      if(U.getBooleanParam(req, params.HIGHLIGHT, params.highlight)) {
+
+        BooleanQuery highlightQuery = new BooleanQuery();
+        U.flattenBooleanQuery(highlightQuery, query);
+        NamedList sumData = U.doStandardHighlighting(results, highlightQuery, 
+                                                     req, params, 
+                                                     queryFields.keySet().toArray(new String[0]));
+        if(sumData != null)
+          rsp.add("highlighting", sumData);
       }
             
     } catch (Exception e) {

Modified: incubator/solr/trunk/src/java/org/apache/solr/request/StandardRequestHandler.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/request/StandardRequestHandler.java?rev=421678&r1=421677&r2=421678&view=diff
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/request/StandardRequestHandler.java (original)
+++ incubator/solr/trunk/src/java/org/apache/solr/request/StandardRequestHandler.java Thu Jul 13 11:22:24 2006
@@ -29,6 +29,8 @@
 
 import org.apache.solr.util.StrUtils;
 import org.apache.solr.util.NamedList;
+import org.apache.solr.util.SolrPluginUtils;
+import org.apache.solr.util.CommonParams;
 import org.apache.solr.search.*;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.core.SolrCore;
@@ -47,13 +49,17 @@
   long numRequests;
   long numErrors;
 
-
-  public void init(NamedList args) {
-    SolrCore.log.log(Level.INFO, "Unused request handler arguments:" + args);
+  /** shorten the class referneces for utilities */
+  private static class U extends SolrPluginUtils {
+    /* :NOOP */
   }
+  /** parameters garnered from config file */
+  protected final CommonParams params = new CommonParams();
 
 
-  private final Pattern splitList=Pattern.compile(",| ");
+  public void init(NamedList args) {
+    params.setValues(args);
+  }
 
   public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
     numRequests++;
@@ -63,24 +69,14 @@
     // we need to un-escape them before we pass to QueryParser
     try {
       String sreq = req.getQueryString();
-      String debug = req.getParam("debugQuery");
-      String defaultField = req.getParam("df");
+      String debug = U.getParam(req, params.DEBUG_QUERY, params.debugQuery);
+      String defaultField = U.getParam(req, params.DF, params.df);
 
       // find fieldnames to return (fieldlist)
-      String fl = req.getParam("fl");
-      int flags=0;
+      String fl = U.getParam(req, params.FL, params.fl);
+      int flags = 0; 
       if (fl != null) {
-        // TODO - this could become more efficient if widely used.
-        // TODO - should field order be maintained?
-        String[] flst = splitList.split(fl,0);
-        if (flst.length > 0 && !(flst.length==1 && flst[0].length()==0)) {
-          Set<String> set = new HashSet<String>();
-          for (String fname : flst) {
-            if ("score".equals(fname)) flags |= SolrIndexSearcher.GET_SCORES;
-            set.add(fname);
-          }
-          rsp.setReturnFields(set);
-        }
+        flags |= U.setReturnFields(fl, rsp);
       }
 
       if (sreq==null) throw new SolrException(400,"Missing queryString");
@@ -104,25 +100,20 @@
       DocList results = req.getSearcher().getDocList(query, null, sort, req.getStart(), req.getLimit(), flags);
       rsp.add(null,results);
 
-      if (debug!=null) {
-        NamedList dbg = new NamedList();
-        try {
-          dbg.add("querystring",qs);
-          dbg.add("parsedquery",QueryParsing.toString(query,req.getSchema()));
-          dbg.add("explain", getExplainList(query, results, req.getSearcher(), req.getSchema()));
-          String otherQueryS = req.getParam("explainOther");
-          if (otherQueryS != null && otherQueryS.length() > 0) {
-            DocList otherResults = doQuery(otherQueryS,req.getSearcher(), req.getSchema(),0,10);
-            dbg.add("otherQuery",otherQueryS);
-            dbg.add("explainOther", getExplainList(query, otherResults, req.getSearcher(), req.getSchema()));
-          }
-        } catch (Exception e) {
-          SolrException.logOnce(SolrCore.log,"Exception during debug:",e);
-          dbg.add("exception_during_debug", SolrException.toStr(e));
-        }
-        rsp.add("debug",dbg);
+      try {
+        NamedList dbg = U.doStandardDebug(req, qs, query, results, params);
+        if (null != dbg) 
+          rsp.add("debug", dbg);
+      } catch (Exception e) {
+        SolrException.logOnce(SolrCore.log, "Exception durring debug", e);
+        rsp.add("exception_during_debug", SolrException.toStr(e));
       }
 
+      NamedList sumData = SolrPluginUtils.doStandardHighlighting(
+        results, query, req, params, new String[]{defaultField});
+      if(sumData != null)
+        rsp.add("highlighting", sumData);
+
     } catch (SolrException e) {
       rsp.setException(e);
       numErrors++;
@@ -134,52 +125,6 @@
       return;
     }
   }
-
-  private NamedList getExplainList(Query query, DocList results, SolrIndexSearcher searcher, IndexSchema schema) throws IOException {
-    NamedList explainList = new NamedList();
-    DocIterator iterator = results.iterator();
-    for (int i=0; i<results.size(); i++) {
-      int id = iterator.nextDoc();
-
-      Explanation explain = searcher.explain(query, id);
-      //explainList.add(Integer.toString(id), explain.toString().split("\n"));
-
-      Document doc = searcher.doc(id);
-      String strid = schema.printableUniqueKey(doc);
-      String docname = "";
-      if (strid != null) docname="id="+strid+",";
-      docname = docname + "internal_docid="+id;
-
-      explainList.add(docname, "\n" +explain.toString());
-    }
-    return explainList;
-  }
-
-
-  private DocList doQuery(String sreq, SolrIndexSearcher searcher, IndexSchema schema, int start, int limit) throws IOException {
-    List<String> commands = StrUtils.splitSmart(sreq,';');
-
-    String qs = commands.size() >= 1 ? commands.get(0) : "";
-    Query query = QueryParsing.parseQuery(qs, schema);
-
-    // If the first non-query, non-filter command is a simple sort on an indexed field, then
-    // we can use the Lucene sort ability.
-    Sort sort = null;
-    if (commands.size() >= 2) {
-      QueryParsing.SortSpec sortSpec = QueryParsing.parseSort(commands.get(1), schema);
-      if (sortSpec != null) {
-        sort = sortSpec.getSort();
-        if (sortSpec.getCount() >= 0) {
-          limit = sortSpec.getCount();
-        }
-      }
-    }
-
-    DocList results = searcher.getDocList(query,(DocSet)null, sort, start, limit);
-    return results;
-  }
-
-
 
   //////////////////////// SolrInfoMBeans methods //////////////////////
 

Modified: incubator/solr/trunk/src/java/org/apache/solr/util/SolrPluginUtils.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/util/SolrPluginUtils.java?rev=421678&r1=421677&r2=421678&view=diff
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/util/SolrPluginUtils.java (original)
+++ incubator/solr/trunk/src/java/org/apache/solr/util/SolrPluginUtils.java Thu Jul 13 11:22:24 2006
@@ -16,6 +16,7 @@
 
 package org.apache.solr.util;
 
+import org.apache.solr.core.Config; // highlighting
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrInfoMBean;
 import org.apache.solr.core.SolrException;
@@ -40,6 +41,7 @@
 
 import org.apache.solr.util.StrUtils;
 import org.apache.solr.util.NamedList;
+import org.apache.solr.util.XML;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -53,9 +55,22 @@
 import org.apache.lucene.search.ConstantScoreRangeQuery;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.highlight.Highlighter; // highlighting
+import org.apache.lucene.search.highlight.TokenSources;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.Encoder;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.Formatter;
+import org.apache.lucene.search.highlight.SimpleFragmenter;
+import org.apache.lucene.search.highlight.TextFragment;
+import org.apache.lucene.search.highlight.NullFragmenter;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.Token;
+
 
 import org.xmlpull.v1.XmlPullParserException;
 
@@ -73,6 +88,8 @@
 import java.util.HashMap;
 import java.util.regex.Pattern;
 import java.io.IOException;
+import java.io.StringReader;
+import java.io.StringWriter; // highlighting
 import java.net.URL;
     
 /**
@@ -84,6 +101,9 @@
  * </p>
  *
  * <p>:TODO: refactor StandardRequestHandler to use these utilities</p>
+ *
+ * <p>:TODO: Many "standard" functionality methods are not cognisant of
+ * default parameter settings.  
  */
 public class SolrPluginUtils {
     
@@ -108,6 +128,8 @@
                                 String param, String def) {
         
     String v = req.getParam(param);
+    // Note: parameters passed but given only white-space value are
+    // considered equvalent to passing nothing for that parameter.
     if (null == v || "".equals(v.trim())) {
       return def;
     }
@@ -134,7 +156,18 @@
     return r;
   }
         
-
+  /**
+   * Treats parameter value as a boolean.  The string 'false' is false; 
+   * any other non-empty string is true.
+   */
+  public static boolean getBooleanParam(SolrQueryRequest req,
+                                       String param, boolean def) {        
+    String v = req.getParam(param);
+    if (null == v || "".equals(v.trim())) {
+      return def;
+    }
+    return !"false".equals(v.trim());
+  }
     
   private final static Pattern splitList=Pattern.compile(",| ");
 
@@ -142,29 +175,36 @@
    * Assumes the standard query param of "fl" to specify the return fields
    * @see #setReturnFields(String,SolrQueryResponse)
    */
-  public static void setReturnFields(SolrQueryRequest req,
-                                     SolrQueryResponse res) {
+  public static int setReturnFields(SolrQueryRequest req,
+                                    SolrQueryResponse res) {
 
-    setReturnFields(req.getParam(FL), res);
+    return setReturnFields(req.getParam(FL), res);
   }
 
   /**
    * Given a space seperated list of field names, sets the field list on the
    * SolrQueryResponse.
+   *
+   * @return bitfield of SolrIndexSearcher flags that need to be set
    */
-  public static void setReturnFields(String fl,
-                                     SolrQueryResponse res) {
-
+  public static int setReturnFields(String fl,
+                                    SolrQueryResponse res) {
+    int flags = 0;
     if (fl != null) {
       // TODO - this could become more efficient if widely used.
       // TODO - should field order be maintained?
       String[] flst = splitList.split(fl.trim(),0);
       if (flst.length > 0 && !(flst.length==1 && flst[0].length()==0)) {
         Set<String> set = new HashSet<String>();
-        for (String fname : flst) set.add(fname);
+        for (String fname : flst) {
+          if("score".equalsIgnoreCase(fname))
+            flags |= SolrIndexSearcher.GET_SCORES;
+          set.add(fname);
+        }
         res.setReturnFields(set);
       }
     }
+    return flags;
   }
 
   /**
@@ -201,24 +241,24 @@
    * @param query the query built from the userQuery
    *              (and perhaps other clauses) that identifies the main
    *              result set of the response.
-   * @param results the main result set of hte response
+   * @param results the main result set of the response
    */
   public static NamedList doStandardDebug(SolrQueryRequest req,
                                           String userQuery,
                                           Query query,
-                                          DocList results)
+                                          DocList results,
+                                          CommonParams params)
     throws IOException {
         
-        
-    String debug = req.getParam("debugQuery");
+    String debug = getParam(req, params.DEBUG_QUERY, params.debugQuery);
 
     NamedList dbg = null;
     if (debug!=null) {
       dbg = new NamedList();          
 
       /* userQuery may have been pre-processes .. expose that */
-      dbg.add("rawquerystring",req.getQueryString());
-      dbg.add("querystring",userQuery);
+      dbg.add("rawquerystring", req.getQueryString());
+      dbg.add("querystring", userQuery);
 
       /* QueryParsing.toString isn't perfect, use it to see converted
        * values, use regular toString to see any attributes of the
@@ -275,6 +315,177 @@
   }
 
   /**
+   * Retrieve a default Highlighter instance for a given query.
+   *
+   * @param query Query instance
+   */
+  public static Highlighter getDefaultHighlighter(Query query) {
+    Highlighter highlighter = new Highlighter(
+      new SimpleHTMLFormatter("<em>", "</em>"), 
+      new QueryScorer(query));
+    highlighter.setTextFragmenter(new GapFragmenter());
+    return highlighter;
+  }
+
+  /**
+   * Generates a list of Highlighted query fragments for each item in a list
+   * of documents.  Convenience method that constructs a Highlighter from a
+   * Query.
+   *
+   * @param docs query results
+   * @param fieldNames list of fields to summarize
+   * @param query resulting query object
+   * @param searcher the SolrIndexSearcher corresponding to a request
+   * @param numFragments maximum number of summary fragments to return for
+   *        a given field   
+   */
+  public static NamedList getHighlights(DocList docs, 
+                                        String[] fieldNames, 
+                                        Query query,
+                                        SolrIndexSearcher searcher,
+                                        int numFragments
+                                        ) throws IOException {  
+    
+    return getHighlights(docs, fieldNames, searcher, 
+                         getDefaultHighlighter(query), numFragments);
+  }
+
+  /**
+   * Generates a list of Highlighted query fragments for each item in a list
+   * of documents
+   *
+   * @param docs query results
+   * @param fieldNames list of fields to summarize
+   * @param searcher the SolrIndexSearcher corresponding to a request
+   * @param numFragments maximum number of summary fragments to return for
+   *        a given field   
+   * @param highlighter a customized Highlighter instance
+   *
+   * @return NamedList containing a NamedList for each document, which in
+   * turns contains sets (field, summary) pairs.
+   */
+  public static NamedList getHighlights(DocList docs, 
+                                        String[] fieldNames, 
+                                        SolrIndexSearcher searcher,
+                                        Highlighter highlighter,
+                                        int numFragments
+                                        ) throws IOException {
+    NamedList fragments = new NamedList();
+    DocIterator iterator = docs.iterator();
+    for (int i=0; i<docs.size(); i++) {
+      int docId = iterator.nextDoc();
+      // use the Searcher's doc cache
+      Document doc = searcher.doc(docId);
+      NamedList docSummaries = new NamedList();
+      for(String fieldName : fieldNames) {
+        fieldName = fieldName.trim();
+        String[] docTexts = doc.getValues(fieldName);
+        if(docTexts == null) 
+          continue;        
+        String[] summaries;
+        TextFragment[] frag;
+        if(docTexts.length == 1) {
+          // single-valued field
+          TokenStream tstream;
+          try {
+            // attempt term vectors
+            tstream = TokenSources.getTokenStream(
+              searcher.getReader(), docId, fieldName);
+          } catch (IllegalArgumentException e) {
+            // fall back to analyzer
+            tstream = searcher.getSchema().getAnalyzer().tokenStream(
+             fieldName, new StringReader(docTexts[0]));
+          }
+          frag = highlighter.getBestTextFragments(
+            tstream, docTexts[0], false, numFragments);
+
+        } else {
+          // multi-valued field
+          MultiValueTokenStream tstream;
+          tstream = new MultiValueTokenStream(fieldName,
+                                              docTexts,
+                                              searcher.getSchema().getAnalyzer());
+          frag = highlighter.getBestTextFragments(
+            tstream, tstream.asSingleValue(), false, numFragments);
+        }
+        // convert fragments back into text
+        // TODO: we can include score and position information in output as
+        // snippet attributes
+        if(frag.length > 0) {
+          ArrayList fragTexts = new ArrayList();
+          for (int j = 0; j < frag.length; j++) {
+              if ((frag[j] != null) && (frag[j].getScore() > 0)) {
+                  fragTexts.add(frag[j].toString());
+                }
+            }
+          summaries =  (String[]) fragTexts.toArray(new String[0]);
+          if(summaries.length > 0)
+            docSummaries.add(fieldName, summaries);
+        }
+      }      
+      String printId = searcher.getSchema().printableUniqueKey(doc);
+      fragments.add(printId == null ? null : printId, docSummaries);
+    }    
+    return fragments;
+  }
+
+  /**
+   * Perform highlighting of selected fields.
+   *
+   * @param docs query results
+   * @param query the (possibly re-written query)
+   * @param req associated SolrQueryRequest
+   * @param defaultFields default search field list
+   *
+   * @return NamedList containing summary data, or null if highlighting is 
+   * disabled.
+   *
+   */
+  public static NamedList doStandardHighlighting(DocList docs,
+                                                 Query query,
+                                                 SolrQueryRequest req,
+                                                 CommonParams params,
+                                                 String[] defaultFields
+                                                 ) throws IOException {
+    if(!getBooleanParam(req, params.HIGHLIGHT, params.highlight)) 
+      return null;
+    String fieldParam = getParam(req, params.HIGHLIGHT_FIELDS, 
+                                 params.highlightFields);
+    String fields[];
+    if(fieldParam == null || fieldParam.trim().equals("")) {
+      // use default search field if highlight fieldlist not specified.
+      if (defaultFields == null || defaultFields.length == 0 ||
+          defaultFields[0] == null) {
+        fields = new String[]{req.getSchema().getDefaultSearchFieldName()};
+      } else
+        fields = defaultFields;
+    } else 
+      fields = splitList.split(fieldParam.trim());
+
+    Highlighter highlighter;
+    String formatterSpec = getParam(req, params.HIGHLIGHT_FORMATTER_CLASS,
+                                    params.highlightFormatterClass);
+    if(formatterSpec == null || formatterSpec.equals("")) {
+      highlighter = getDefaultHighlighter(query);
+    } else {
+      highlighter = new Highlighter(
+        (Formatter)Config.newInstance(formatterSpec),
+        new QueryScorer(query));
+      highlighter.setTextFragmenter(new GapFragmenter());
+    }
+    
+    int numFragments = getNumberParam(req, params.MAX_SNIPPETS,
+                                      params.maxSnippets).intValue();
+
+    return getHighlights(
+      docs, 
+      fields, 
+      req.getSearcher(),
+      highlighter,
+      numFragments);
+  }
+
+  /**
    * Executes a basic query in lucene syntax
    */
   public static DocList doSimpleQuery(String sreq,
@@ -455,7 +666,7 @@
    * so do not attempt to reuse it.
    * </p>
    */
-  public static void flatenBooleanQuery(BooleanQuery to, BooleanQuery from) {
+  public static void flattenBooleanQuery(BooleanQuery to, BooleanQuery from) {
 
     BooleanClause[] c = from.getClauses();
     for (int i = 0; i < c.length; i++) {
@@ -468,7 +679,7 @@
           && !c[i].isProhibited()) {
                 
         /* we can recurse */
-        flatenBooleanQuery(to, (BooleanQuery)ci);
+        flattenBooleanQuery(to, (BooleanQuery)ci);
                 
       } else {
         to.add(c[i]);
@@ -512,169 +723,6 @@
     return s.toString().replace("\"","");
   }
 
-     
-
-  /**
-   * A collection on common params, both for Plugin initialization and
-   * for Requests.
-   */
-  public static class CommonParams {
-
-    /** query and init param for tiebreaker value */
-    public static String TIE = "tie";
-    /** query and init param for query fields */
-    public static String QF = "qf";
-    /** query and init param for phrase boost fields */
-    public static String PF = "pf";
-    /** query and init param for MinShouldMatch specification */
-    public static String MM = "mm";
-    /** query and init param for Phrase Slop value */
-    public static String PS = "ps";
-    /** query and init param for boosting query */
-    public static String BQ = "bq";
-    /** query and init param for boosting functions */
-    public static String BF = "bf";
-    /** query and init param for filtering query */
-    public static String FQ = "fq";
-    /** query and init param for field list */
-    public static String FL = "fl";
-    /** query and init param for field list */
-    public static String GEN = "gen";
-        
-    /** the default tie breaker to use in DisjunctionMaxQueries */
-    public float tiebreaker = 0.0f;
-    /** the default query fields to be used */
-    public String qf = null;
-    /** the default phrase boosting fields to be used */
-    public String pf = null;
-    /** the default min should match to be used */
-    public String mm = "100%";
-    /** the default phrase slop to be used */
-    public int pslop = 0;
-    /** the default boosting query to be used */
-    public String bq = null;
-    /** the default boosting functions to be used */
-    public String bf = null;
-    /** the default filtering query to be used */
-    public String fq = null;
-    /** the default field list to be used */
-    public String fl = null;
-
-    public CommonParams() {
-      /* :NOOP: */
-    }
-
-    /** @see #setValues */
-    public CommonParams(NamedList args) {
-      this();
-      setValues(args);
-    }
-
-    /**
-     * Sets the params using values from a NamedList, usefull in the
-     * init method for your handler.
-     *
-     * <p>
-     * If any param is not of the expected type, a severe error is
-     * logged,and the param is skipped.
-     * </p>
-     *
-     * <p>
-     * If any param is not of in the NamedList, it is skipped and the
-     * old value is left alone.
-     * </p>
-     *
-     */
-    public void setValues(NamedList args) {
-
-      Object tmp;
-
-      tmp = args.get(TIE);
-      if (null != tmp) {
-        if (tmp instanceof Float) {
-          tiebreaker = ((Float)tmp).floatValue();
-        } else {
-          SolrCore.log.severe("init param is not a float: " + TIE);
-        }
-      }
-
-      tmp = args.get(QF);
-      if (null != tmp) {
-        if (tmp instanceof String) {
-          qf = tmp.toString();
-        } else {
-          SolrCore.log.severe("init param is not a str: " + QF);
-        }
-      }
-
-      tmp = args.get(PF);
-      if (null != tmp) {
-        if (tmp instanceof String) {
-          pf = tmp.toString();
-        } else {
-          SolrCore.log.severe("init param is not a str: " + PF);
-        }
-      }
-
-        
-      tmp = args.get(MM);
-      if (null != tmp) {
-        if (tmp instanceof String) {
-          mm = tmp.toString();
-        } else {
-          SolrCore.log.severe("init param is not a str: " + MM);
-        }
-      }
-        
-      tmp = args.get(PS);
-      if (null != tmp) {
-        if (tmp instanceof Integer) {
-          pslop = ((Integer)tmp).intValue();
-        } else {
-          SolrCore.log.severe("init param is not an int: " + PS);
-        }
-      }
-
-      tmp = args.get(BQ);
-      if (null != tmp) {
-        if (tmp instanceof String) {
-          bq = tmp.toString();
-        } else {
-          SolrCore.log.severe("init param is not a str: " + BQ);
-        }
-      }
- 
-      tmp = args.get(BF);
-      if (null != tmp) {
-        if (tmp instanceof String) {
-          bf = tmp.toString();
-        } else {
-          SolrCore.log.severe("init param is not a str: " + BF);
-        }
-      }
- 
-      tmp = args.get(FQ);
-      if (null != tmp) {
-        if (tmp instanceof String) {
-          fq = tmp.toString();
-        } else {
-          SolrCore.log.severe("init param is not a str: " + FQ);
-        }
-      }
-        
-      tmp = args.get(FL);
-      if (null != tmp) {
-        if (tmp instanceof String) {
-          fl = tmp.toString();
-        } else {
-          SolrCore.log.severe("init param is not a str: " + FL);
-        }
-      }
-        
-    }
-
-  }
-
   /**
    * A subclass of SolrQueryParser that supports aliasing fields for
    * constructing DisjunctionMaxQueries.
@@ -763,8 +811,6 @@
         
   }
 
-
-    
   /**
    * Determines the correct Sort based on the request parameter "sort"
    *
@@ -818,6 +864,105 @@
     }
             
   }
+}
+
+/** 
+ * Helper class which creates a single TokenStream out of values from a 
+ * multi-valued field.
+ */
+class MultiValueTokenStream extends TokenStream {
+  private String fieldName;
+  private String[] values;
+  private Analyzer analyzer;
+  private int curIndex;                  // next index into the values array
+  private int curOffset;                 // offset into concatenated string
+  private TokenStream currentStream;     // tokenStream currently being iterated
+
+  /** Constructs a TokenStream for consecutively-analyzed field values
+   *
+   * @param fieldName name of the field
+   * @param values array of field data
+   * @param analyzer analyzer instance
+   */
+  public MultiValueTokenStream(String fieldName, String[] values, 
+                               Analyzer analyzer) {
+    this.fieldName = fieldName;
+    this.values = values;
+    this.analyzer = analyzer;
+    curIndex = -1;
+    curOffset = 0;
+    currentStream = null;
     
-    
+  }
+
+  /** Returns the next token in the stream, or null at EOS. */
+  public Token next() throws IOException {
+    int extra = 0;
+    if(currentStream == null) {
+      curIndex++;        
+      if(curIndex < values.length) {
+        currentStream = analyzer.tokenStream(fieldName, 
+                                             new StringReader(values[curIndex]));
+        // add extra space between multiple values
+        if(curIndex > 0) 
+          extra = analyzer.getPositionIncrementGap(fieldName);
+      } else {
+        return null;
+      }
+    }
+    Token nextToken = currentStream.next();
+    if(nextToken == null) {
+      curOffset += values[curIndex].length();
+      currentStream = null;
+      return next();
+    }
+    // create an modified token which is the offset into the concatenated
+    // string of all values
+    Token offsetToken = new Token(nextToken.termText(), 
+                                  nextToken.startOffset() + curOffset,
+                                  nextToken.endOffset() + curOffset);
+    offsetToken.setPositionIncrement(nextToken.getPositionIncrement() + extra*10);
+    return offsetToken;
+  }
+
+  /**
+   * Returns all values as a single String into which the Tokens index with
+   * their offsets.
+   */
+  public String asSingleValue() {
+    StringBuilder sb = new StringBuilder();
+    for(String str : values)
+      sb.append(str);
+    return sb.toString();
+  }
+
+}
+
+/**
+ * A simple modification of SimpleFragmenter which additionally creates new
+ * fragments when an unusually-large position increment is encountered
+ * (this behaves much better in the presence of multi-valued fields).
+ */
+class GapFragmenter extends SimpleFragmenter {
+  public static final int INCREMENT_THRESHOLD = 50;
+  protected int fragOffsetAccum = 0;
+  /* (non-Javadoc)
+   * @see org.apache.lucene.search.highlight.TextFragmenter#start(java.lang.String)
+   */
+  public void start(String originalText) {
+    fragOffsetAccum = 0;
+  }
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token)
+   */
+  public boolean isNewFragment(Token token) {
+    boolean isNewFrag = 
+      token.endOffset() >= fragOffsetAccum + getFragmentSize() ||
+      token.getPositionIncrement() > INCREMENT_THRESHOLD;
+    if(isNewFrag) {
+        fragOffsetAccum += token.endOffset() - fragOffsetAccum;
+    }
+    return isNewFrag;
+  }
 }

Modified: incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml?rev=421678&r1=421677&r2=421678&view=diff
==============================================================================
--- incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml (original)
+++ incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml Thu Jul 13 11:22:24 2006
@@ -339,6 +339,8 @@
    <dynamicField name="*_sI" type="string"  indexed="true"  stored="false"/>
    <dynamicField name="*_sS" type="string"  indexed="false" stored="true"/>
    <dynamicField name="t_*"  type="text"    indexed="true"  stored="true"/>
+   <dynamicField name="tv_*"  type="text" indexed="true"  stored="true" 
+      termVectors="true" termPositions="true" termOffsets="true"/>
    
 
    <!-- for testing to ensure that longer patterns are matched first -->



Re: svn commit: r421678 - in /incubator/solr/trunk: ./ src/java/org/apache/solr/request/ src/java/org/apache/solr/util/ src/test/test-files/solr/conf/

Posted by Chris Hostetter <ho...@fucit.org>.
: Modified:
:     incubator/solr/trunk/CHANGES.txt
:     incubator/solr/trunk/src/java/org/apache/solr/request/DisMaxRequestHandler.java
:     incubator/solr/trunk/src/java/org/apache/solr/request/StandardRequestHandler.java
:     incubator/solr/trunk/src/java/org/apache/solr/util/SolrPluginUtils.java
:     incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml

Yonik: i think you forgot to svn add the new files from the patch.

I'm getting "ant clean test" compilation failures because it can't find
"CommonParams" and "DisMaxParams"



-Hoss