You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2008/10/23 17:49:18 UTC

svn commit: r707399 - in /lucene/solr/trunk: ./ example/solr/conf/ src/java/org/apache/solr/common/params/ src/java/org/apache/solr/handler/component/ src/test/org/apache/solr/handler/component/ src/test/test-files/solr/conf/

Author: gsingers
Date: Thu Oct 23 08:49:18 2008
New Revision: 707399

URL: http://svn.apache.org/viewvc?rev=707399&view=rev
Log:
SOLR-651: Added in TermVectorComponent

Added:
    lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java   (with props)
    lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java   (with props)
    lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java   (with props)
Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/example/solr/conf/schema.xml
    lucene/solr/trunk/example/solr/conf/solrconfig.xml
    lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml

Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=707399&r1=707398&r2=707399&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Thu Oct 23 08:49:18 2008
@@ -70,6 +70,9 @@
 10. SOLR-746: Added "omitHeader" request parameter to omit the header from the response.
     (Noble Paul via shalin)
 
+11. SOLR-651: Added TermVectorComponent for serving up term vector information, plus IDF.
+    See http://wiki.apache.org/solr/TermVectorComponent (gsingers, Vaijanath N. Rao, Noble Paul)
+
 Optimizations
 ----------------------
  1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the

Modified: lucene/solr/trunk/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/solr/conf/schema.xml?rev=707399&r1=707398&r2=707399&view=diff
==============================================================================
--- lucene/solr/trunk/example/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/example/solr/conf/schema.xml Thu Oct 23 08:49:18 2008
@@ -281,6 +281,8 @@
      termVectors: [false] set to true to store the term vector for a given field.
        When using MoreLikeThis, fields used for similarity should be stored for 
        best performance.
+     termPositions: Store position information with the term vector.  This will increase storage costs.
+     termOffsets: Store offset information with the term vector. This will increase storage costs.
    -->
 
    <field name="id" type="string" indexed="true" stored="true" required="true" /> 
@@ -290,7 +292,7 @@
    <field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
    <field name="manu" type="text" indexed="true" stored="true" omitNorms="true"/>
    <field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" termVectors="true" />
-   <field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
+   <field name="features" type="text" indexed="true" stored="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
    <field name="includes" type="text" indexed="true" stored="true"/>
 
    <field name="weight" type="sfloat" indexed="true" stored="true"/>

Modified: lucene/solr/trunk/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/solr/conf/solrconfig.xml?rev=707399&r1=707398&r2=707399&view=diff
==============================================================================
--- lucene/solr/trunk/example/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/example/solr/conf/solrconfig.xml Thu Oct 23 08:49:18 2008
@@ -574,7 +574,8 @@
     </lst>
   </searchComponent>
 
-  <!-- a request handler utilizing the spellcheck component -->
+  <!-- a request handler utilizing the spellcheck component.  This is purely as an example.
+  You will likely want to add the component to your already specified request handlers. -->
   <requestHandler name="/spellCheckCompRH" class="solr.SearchHandler">
     <lst name="defaults">
       <!-- omp = Only More Popular -->
@@ -589,6 +590,19 @@
     </arr>
   </requestHandler>
 
+  <searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/>
+  <!-- A Req Handler for working with the tvComponent.  This is purely as an example.
+  You will likely want to add the component to your already specified request handlers. -->
+  <requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler">
+    <lst name="defaults">
+      <bool name="tv">true</bool>
+    </lst>
+    <arr name="last-components">
+      <str>tvComponent</str>
+    </arr>
+  </requestHandler>
+
+
   <!-- a search component that enables you to configure the top results for
        a given query regardless of the normal lucene scoring.-->
   <searchComponent name="elevator" class="solr.QueryElevationComponent" >

Added: lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java?rev=707399&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java Thu Oct 23 08:49:18 2008
@@ -0,0 +1,50 @@
+package org.apache.solr.common.params;
+
+
+/**
+ *
+ *
+ **/
+public interface TermVectorParams {
+
+  public static final String TV_PREFIX = "tv.";
+
+    /**
+  * Return Term Frequency info
+  * */
+  public static final String TF =  TV_PREFIX + "tf";
+  /**
+  * Return Term Vector position information
+  *
+  * */
+  public static final String POSITIONS = TV_PREFIX + "positions";
+  /**
+  * Return offset information, if available
+  * */
+  public static final String OFFSETS = TV_PREFIX + "offsets";
+  /**
+  * Return IDF information.  May be expensive
+  * */
+  public static final String IDF = TV_PREFIX + "idf";
+
+  /**
+   * Return TF-IDF calculation, i.e. (tf / idf).  May be expensive.
+   */
+  public static final String TF_IDF = TV_PREFIX + "tf-idf";
+
+
+  /**
+   * Return all the options: TF, positions, offsets, idf
+   */
+  public static final String ALL = TV_PREFIX + "all";
+
+  /**
+   * The fields to get term vectors for
+   */
+  public static final String FIELDS = TV_PREFIX + "fl";
+
+  /**
+   * The Doc Ids (Lucene internal ids) of the docs to get the term vectors for
+   */
+  public static final String DOC_IDS = TV_PREFIX + "docIds";
+}

Propchange: lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java?rev=707399&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java Thu Oct 23 08:49:18 2008
@@ -0,0 +1,307 @@
+package org.apache.solr.handler.component;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.SetBasedFieldSelector;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermVectorMapper;
+import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.TermVectorParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocListAndSet;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.RefCounted;
+import org.apache.solr.util.plugin.SolrCoreAware;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Logger;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Return term vectors for the documents in a query result set.
+ * <p/>
+ * Info available:
+ * term, frequency, position, offset, IDF.
+ * <p/>
+ * <b>Note</b> Returning IDF can be expensive.
+ */
+public class TermVectorComponent extends SearchComponent implements SolrCoreAware {
+  private transient static Logger log = Logger.getLogger(TermVectorComponent.class.getName());
+
+  public static final String COMPONENT_NAME = "tv";
+
+  protected NamedList initParams;
+  public static final String TERM_VECTORS = "termVectors";
+
+
+  public void process(ResponseBuilder rb) throws IOException {
+    SolrParams params = rb.req.getParams();
+    if (!params.getBool(COMPONENT_NAME, false)) {
+      return;
+    }
+
+    NamedList termVectors = new NamedList();
+    rb.rsp.add(TERM_VECTORS, termVectors);
+    //figure out what options we have, and try to get the appropriate vector
+    boolean termFreq = params.getBool(TermVectorParams.TF, false);
+    boolean positions = params.getBool(TermVectorParams.POSITIONS, false);
+    boolean offsets = params.getBool(TermVectorParams.OFFSETS, false);
+    boolean idf = params.getBool(TermVectorParams.IDF, false);
+    boolean tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
+    //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
+
+    boolean all = params.getBool(TermVectorParams.ALL, false);
+    if (all == true){
+      termFreq = true;
+      positions = true;
+      offsets = true;
+      idf = true;
+      tfIdf = true;
+    }
+
+    String[] fields = params.getParams(TermVectorParams.FIELDS);
+    if (fields == null) {
+      fields = params.getParams(CommonParams.FL);
+    }
+    DocListAndSet listAndSet = rb.getResults();
+    List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
+    Iterator<Integer> iter;
+    if (docIds != null && docIds.isEmpty() == false) {
+      iter = docIds.iterator();
+    } else {
+      DocList list = listAndSet.docList;
+      iter = list.iterator();
+    }
+    SolrCore core = rb.req.getCore();
+    RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
+    try {
+      IndexReader reader = searcher.get().getReader();
+      TVMapper mapper = new TVMapper(fields, reader, termFreq, positions, offsets, idf, tfIdf);
+      IndexSchema schema = core.getSchema();
+      String uniqFieldName = schema.getUniqueKeyField().getName();
+      SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.singleton(uniqFieldName), Collections.emptySet());
+      while (iter.hasNext()) {
+        Integer docId = iter.next();
+        NamedList docNL = new NamedList();
+        termVectors.add("doc-" + docId, docNL);
+        mapper.docNL = docNL;
+        Document document = reader.document(docId, fieldSelector);
+        String uniqId = document.get(uniqFieldName);
+        docNL.add("uniqueKey", uniqId);
+        reader.getTermFreqVector(docId, mapper);
+      }
+      termVectors.add("uniqueKeyFieldName", uniqFieldName);
+    } finally {
+      searcher.decref();
+    }
+  }
+
+  private List<Integer> getInts(String[] vals) {
+    List<Integer> result = null;
+    if (vals != null && vals.length > 0) {
+      result = new ArrayList<Integer>(vals.length);
+      for (int i = 0; i < vals.length; i++) {
+        try {
+          result.add(new Integer(vals[i]));
+        } catch (NumberFormatException e) {
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e);
+        }
+      }
+    }
+    return result;
+  }
+
+  @Override
+  public int distributedProcess(ResponseBuilder rb) throws IOException {
+    int result = ResponseBuilder.STAGE_DONE;
+    if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
+      //Go ask each shard for it's vectors
+      // for each shard, collect the documents for that shard.
+      HashMap<String, Collection<ShardDoc>> shardMap = new HashMap<String, Collection<ShardDoc>>();
+      for (ShardDoc sdoc : rb.resultIds.values()) {
+        Collection<ShardDoc> shardDocs = shardMap.get(sdoc.shard);
+        if (shardDocs == null) {
+          shardDocs = new ArrayList<ShardDoc>();
+          shardMap.put(sdoc.shard, shardDocs);
+        }
+        shardDocs.add(sdoc);
+      }
+      // Now create a request for each shard to retrieve the stored fields
+      for (Collection<ShardDoc> shardDocs : shardMap.values()) {
+        ShardRequest sreq = new ShardRequest();
+        sreq.purpose = ShardRequest.PURPOSE_GET_FIELDS;
+
+        sreq.shards = new String[]{shardDocs.iterator().next().shard};
+
+        sreq.params = new ModifiableSolrParams();
+
+        // add original params
+        sreq.params.add(rb.req.getParams());
+        sreq.params.remove(CommonParams.Q);//remove the query
+        ArrayList<String> ids = new ArrayList<String>(shardDocs.size());
+        for (ShardDoc shardDoc : shardDocs) {
+          ids.add(shardDoc.id.toString());
+        }
+        sreq.params.add(TermVectorParams.DOC_IDS, StrUtils.join(ids, ','));
+
+        rb.addRequest(this, sreq);
+      }
+      result = ResponseBuilder.STAGE_DONE;
+    }
+    return result;
+  }
+
+  private class TVMapper extends TermVectorMapper {
+    private NamedList docNL;
+    private IndexReader reader;
+    private Set<String> fields;
+    private boolean termFreq, positions, offsets, idf, tfIdf;
+    //internal vars not passed in by construction
+    private boolean map, useOffsets, usePositions;
+    //private Map<String, Integer> idfCache;
+    private NamedList fieldNL;
+    private Term currentTerm;
+
+    public TVMapper(String[] fields, IndexReader reader, boolean termFreq, boolean positions, boolean offsets, boolean idf, boolean tfIdf) {
+
+      this.reader = reader;
+      this.fields = fields != null ? new HashSet<String>(Arrays.asList(fields)) : Collections.<String>emptySet();
+      this.termFreq = termFreq;
+      this.positions = positions;
+      this.offsets = offsets;
+      this.idf = idf;
+      this.tfIdf = tfIdf;
+
+    }
+
+    public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+      if (map == true && fieldNL != null) {
+        NamedList termInfo = new NamedList();
+        fieldNL.add(term, termInfo);
+        if (termFreq == true) {
+          termInfo.add("freq", frequency);
+        }
+        if (useOffsets == true) {
+          NamedList theOffsets = new NamedList();
+          termInfo.add("offsets", theOffsets);
+          for (int i = 0; i < offsets.length; i++) {
+            TermVectorOffsetInfo offset = offsets[i];
+            theOffsets.add("start", offset.getStartOffset());
+            theOffsets.add("end", offset.getEndOffset());
+          }
+        }
+        if (usePositions == true) {
+          NamedList positionsNL = new NamedList();
+          for (int i = 0; i < positions.length; i++) {
+            positionsNL.add("position", positions[i]);            
+          }
+          termInfo.add("positions", positionsNL);
+        }
+        if (idf == true) {
+          termInfo.add("idf", getIdf(term));
+        }
+        if (tfIdf == true){
+          double tfIdfVal = ((double) frequency) / getIdf(term);
+          termInfo.add("tf-idf", tfIdfVal);
+        }
+      }
+    }
+
+    private int getIdf(String term) {
+      int result = 1;
+      currentTerm = currentTerm.createTerm(term);
+      try {
+        TermEnum termEnum = reader.terms(currentTerm);
+        if (termEnum != null && termEnum.term().equals(currentTerm)) {
+          result = termEnum.docFreq();
+        }
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+      return result;
+    }
+
+    public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
+
+      if (idf == true && reader != null) {
+        this.currentTerm = new Term(field);
+      }
+      useOffsets = storeOffsets && offsets;
+      usePositions = storePositions && positions;
+      if (fields.isEmpty() || fields.contains(field)) {
+        map = true;
+        fieldNL = new NamedList();
+        docNL.add(field, fieldNL);
+      } else {
+        map = false;
+        fieldNL = null;
+      }
+    }
+  }
+
+  public void prepare(ResponseBuilder rb) throws IOException {
+
+  }
+
+  //////////////////////// NamedListInitializedPlugin methods //////////////////////
+  @Override
+  public void init(NamedList args) {
+    super.init(args);
+    this.initParams = args;
+  }
+
+  public void inform(SolrCore core) {
+
+  }
+
+  public String getVersion() {
+    return "$Revision$";
+  }
+
+  public String getSourceId() {
+    return "$Id:$";
+  }
+
+  public String getSource() {
+    return "$Revision:$";
+  }
+
+  public String getDescription() {
+    return "A Component for working with Term Vectors";
+  }
+}

Propchange: lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java?rev=707399&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java (added)
+++ lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java Thu Oct 23 08:49:18 2008
@@ -0,0 +1,211 @@
+package org.apache.solr.handler.component;
+
+import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.TermVectorParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.request.SolrRequestHandler;
+import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.request.LocalSolrQueryRequest;
+
+import java.util.HashMap;
+import java.util.ArrayList;
+import java.util.Arrays;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ *
+ *
+ **/
+public class TermVectorComponentTest extends AbstractSolrTestCase {
+  @Override
+  public String getSchemaFile() {
+    return "schema.xml";
+  }
+
+  @Override
+  public String getSolrConfigFile() {
+    return "solrconfig.xml";
+  }
+
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    assertU(adoc("id", "0", "test_posofftv", "This is a title and another title"));
+    assertU(adoc("id", "1", "test_posofftv",
+            "The quick reb fox jumped over the lazy brown dogs."));
+    assertU(adoc("id", "2", "test_posofftv", "This is a document"));
+    assertU(adoc("id", "3", "test_posofftv", "another document"));
+    //bunch of docs that are variants on blue
+    assertU(adoc("id", "4", "test_posofftv", "blue"));
+    assertU(adoc("id", "5", "test_posofftv", "blud"));
+    assertU(adoc("id", "6", "test_posofftv", "boue"));
+    assertU(adoc("id", "7", "test_posofftv", "glue"));
+    assertU(adoc("id", "8", "test_posofftv", "blee"));
+    assertU(adoc("id", "9", "test_posofftv", "blah"));
+
+    assertU("commit", commit());
+  }
+
+  public void testBasics() throws Exception {
+    SolrCore core = h.getCore();
+    SearchComponent tvComp = core.getSearchComponent("tvComponent");
+    assertTrue("tvComp is null and it shouldn't be", tvComp != null);
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.add(CommonParams.Q, "id:0");
+    params.add(CommonParams.QT, "tvrh");
+    params.add(TermVectorParams.TF, "true");
+    params.add(TermVectorComponent.COMPONENT_NAME, "true");
+    SolrRequestHandler handler = core.getRequestHandler("tvrh");
+    SolrQueryResponse rsp;
+    rsp = new SolrQueryResponse();
+    rsp.add("responseHeader", new SimpleOrderedMap());
+    handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+    NamedList values = rsp.getValues();
+    NamedList termVectors = (NamedList) values.get(TermVectorComponent.TERM_VECTORS);
+    assertTrue("termVectors is null and it shouldn't be", termVectors != null);
+    System.out.println("TVs:" + termVectors);
+    NamedList doc = (NamedList) termVectors.getVal(0);
+    assertTrue("doc is null and it shouldn't be", doc != null);
+    assertTrue(doc.size() + " does not equal: " + 2, doc.size() == 2);
+    NamedList field = (NamedList) doc.get("test_posofftv");
+    assertTrue("field is null and it shouldn't be", field != null);
+    assertTrue(field.size() + " does not equal: " + 2, field.size() == 2);
+    NamedList titl = (NamedList) field.get("titl");
+    assertTrue("titl is null and it shouldn't be", titl != null);
+    assertTrue(titl.get("freq") + " does not equal: " + 2, ((Integer) titl.get("freq")) == 2);
+
+    String uniqueKeyFieldName = (String) termVectors.getVal(1);
+    assertTrue("uniqueKeyFieldName is null and it shouldn't be", uniqueKeyFieldName != null);
+    assertTrue(uniqueKeyFieldName + " is not equal to " + "id", uniqueKeyFieldName.equals("id") == true);
+
+  }
+
+  public void testOptions() throws Exception {
+    SolrCore core = h.getCore();
+    SearchComponent tvComp = core.getSearchComponent("tvComponent");
+    assertTrue("tvComp is null and it shouldn't be", tvComp != null);
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.add(CommonParams.Q, "id:0");
+    params.add(CommonParams.QT, "tvrh");
+    params.add(TermVectorParams.TF, "true");
+    params.add(TermVectorParams.IDF, "true");
+    params.add(TermVectorParams.OFFSETS, "true");
+    params.add(TermVectorParams.POSITIONS, "true");
+    params.add(TermVectorParams.TF_IDF, "true");
+    params.add(TermVectorComponent.COMPONENT_NAME, "true");
+
+    SolrRequestHandler handler = core.getRequestHandler("tvrh");
+    SolrQueryResponse rsp;
+    rsp = new SolrQueryResponse();
+    rsp.add("responseHeader", new SimpleOrderedMap());
+    handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+    NamedList values = rsp.getValues();
+    NamedList termVectors = (NamedList) values.get(TermVectorComponent.TERM_VECTORS);
+    assertTrue("termVectors is null and it shouldn't be", termVectors != null);
+    System.out.println("TVs: " + termVectors);
+    NamedList doc = (NamedList) termVectors.getVal(0);
+    assertTrue("doc is null and it shouldn't be", doc != null);
+    assertTrue(doc.size() + " does not equal: " + 2, doc.size() == 2);
+  }
+
+
+  public void testNoFields() throws Exception {
+    SolrCore core = h.getCore();
+    SearchComponent tvComp = core.getSearchComponent("tvComponent");
+    assertTrue("tvComp is null and it shouldn't be", tvComp != null);
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.add(CommonParams.Q, "id:0");
+    params.add(CommonParams.QT, "tvrh");
+    params.add(TermVectorParams.TF, "true");
+    //Pass in a field that doesn't exist on the doc, thus, no vectors should be returned
+    params.add(TermVectorParams.FIELDS, "foo");
+    params.add(TermVectorComponent.COMPONENT_NAME, "true");
+    SolrRequestHandler handler = core.getRequestHandler("tvrh");
+    SolrQueryResponse rsp;
+    rsp = new SolrQueryResponse();
+    rsp.add("responseHeader", new SimpleOrderedMap());
+    handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+    NamedList values = rsp.getValues();
+    NamedList termVectors = (NamedList) values.get(TermVectorComponent.TERM_VECTORS);
+    assertTrue("termVectors is null and it shouldn't be", termVectors != null);
+    NamedList doc = (NamedList) termVectors.getVal(0);
+    assertTrue("doc is null and it shouldn't be", doc != null);
+    assertTrue(doc.size() + " does not equal: " + 1, doc.size() == 1);
+  }
+
+  public void testDistributed() throws Exception {
+    SolrCore core = h.getCore();
+    TermVectorComponent tvComp = (TermVectorComponent) core.getSearchComponent("tvComponent");
+    assertTrue("tvComp is null and it shouldn't be", tvComp != null);
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    ResponseBuilder rb = new ResponseBuilder();
+    rb.stage = ResponseBuilder.STAGE_GET_FIELDS;
+    rb.shards = new String[]{"localhost:0", "localhost:1", "localhost:2", "localhost:3"};//we don't actually call these, since we are going to invoke distributedProcess directly
+    rb.resultIds = new HashMap<Object, ShardDoc>();
+    rb.components = new ArrayList<SearchComponent>();
+    rb.components.add(tvComp);
+    params.add(CommonParams.Q, "id:0");
+    params.add(CommonParams.QT, "tvrh");
+    params.add(TermVectorParams.TF, "true");
+    params.add(TermVectorParams.IDF, "true");
+    params.add(TermVectorParams.OFFSETS, "true");
+    params.add(TermVectorParams.POSITIONS, "true");
+    params.add(TermVectorComponent.COMPONENT_NAME, "true");
+    rb.req = new LocalSolrQueryRequest(core, params);
+    rb.outgoing = new ArrayList<ShardRequest>();
+    //one doc per shard, but make sure there are enough docs to go around
+    for (int i = 0; i < rb.shards.length; i++){
+      ShardDoc doc = new ShardDoc();
+      doc.id = i; //must be a valid doc that was indexed.
+      doc.score = 1 - (i / (float)rb.shards.length);
+      doc.positionInResponse = i;
+      doc.shard = rb.shards[i];
+      doc.orderInShard = 0;
+      rb.resultIds.put(doc.id, doc);
+    }
+
+    int result = tvComp.distributedProcess(rb);
+    assertTrue(result + " does not equal: " + ResponseBuilder.STAGE_DONE, result == ResponseBuilder.STAGE_DONE);
+    //one outgoing per shard
+    assertTrue("rb.outgoing Size: " + rb.outgoing.size() + " is not: " + rb.shards.length, rb.outgoing.size() == rb.shards.length);
+    for (ShardRequest request : rb.outgoing) {
+      ModifiableSolrParams solrParams = request.params;
+      System.out.println("Shard: " + Arrays.asList(request.shards) + " Params: " + solrParams);
+    }
+  }
+
+}
+
+
+
+
+
+/*
+* <field name="test_basictv" type="text" termVectors="true"/>
+   <field name="test_notv" type="text" termVectors="false"/>
+   <field name="test_postv" type="text" termVectors="true" termPositions="true"/>
+   <field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
+   <field name="test_posofftv" type="text" termVectors="true"
+     termPositions="true" termOffsets="true"/>
+*
+* */

Propchange: lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml?rev=707399&r1=707398&r2=707399&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml Thu Oct 23 08:49:18 2008
@@ -369,6 +369,18 @@
     </arr>
   </requestHandler>
 
+  
+  <searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/>
+
+  <requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler">
+    <lst name="defaults">
+
+    </lst>
+    <arr name="last-components">
+      <str>tvComponent</str>
+    </arr>
+  </requestHandler>
+
   <highlighting>
    <!-- Configure the standard fragmenter -->
    <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">