You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2008/10/23 17:49:18 UTC
svn commit: r707399 - in /lucene/solr/trunk: ./ example/solr/conf/
src/java/org/apache/solr/common/params/
src/java/org/apache/solr/handler/component/
src/test/org/apache/solr/handler/component/ src/test/test-files/solr/conf/
Author: gsingers
Date: Thu Oct 23 08:49:18 2008
New Revision: 707399
URL: http://svn.apache.org/viewvc?rev=707399&view=rev
Log:
SOLR-651: Added in TermVectorComponent
Added:
lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java (with props)
lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java (with props)
lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java (with props)
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/example/solr/conf/schema.xml
lucene/solr/trunk/example/solr/conf/solrconfig.xml
lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml
Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=707399&r1=707398&r2=707399&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Thu Oct 23 08:49:18 2008
@@ -70,6 +70,9 @@
10. SOLR-746: Added "omitHeader" request parameter to omit the header from the response.
(Noble Paul via shalin)
+11. SOLR-651: Added TermVectorComponent for serving up term vector information, plus IDF.
+ See http://wiki.apache.org/solr/TermVectorComponent (gsingers, Vaijanath N. Rao, Noble Paul)
+
Optimizations
----------------------
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the
Modified: lucene/solr/trunk/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/solr/conf/schema.xml?rev=707399&r1=707398&r2=707399&view=diff
==============================================================================
--- lucene/solr/trunk/example/solr/conf/schema.xml (original)
+++ lucene/solr/trunk/example/solr/conf/schema.xml Thu Oct 23 08:49:18 2008
@@ -281,6 +281,8 @@
termVectors: [false] set to true to store the term vector for a given field.
When using MoreLikeThis, fields used for similarity should be stored for
best performance.
+ termPositions: Store position information with the term vector. This will increase storage costs.
+ termOffsets: Store offset information with the term vector. This will increase storage costs.
-->
<field name="id" type="string" indexed="true" stored="true" required="true" />
@@ -290,7 +292,7 @@
<field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
<field name="manu" type="text" indexed="true" stored="true" omitNorms="true"/>
<field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true" omitNorms="true" termVectors="true" />
- <field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
+ <field name="features" type="text" indexed="true" stored="true" multiValued="true" termVectors="true" termPositions="true" termOffsets="true"/>
<field name="includes" type="text" indexed="true" stored="true"/>
<field name="weight" type="sfloat" indexed="true" stored="true"/>
Modified: lucene/solr/trunk/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/example/solr/conf/solrconfig.xml?rev=707399&r1=707398&r2=707399&view=diff
==============================================================================
--- lucene/solr/trunk/example/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/example/solr/conf/solrconfig.xml Thu Oct 23 08:49:18 2008
@@ -574,7 +574,8 @@
</lst>
</searchComponent>
- <!-- a request handler utilizing the spellcheck component -->
+ <!-- a request handler utilizing the spellcheck component. This is purely as an example.
+ You will likely want to add the component to your already specified request handlers. -->
<requestHandler name="/spellCheckCompRH" class="solr.SearchHandler">
<lst name="defaults">
<!-- omp = Only More Popular -->
@@ -589,6 +590,19 @@
</arr>
</requestHandler>
+ <searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/>
+ <!-- A Req Handler for working with the tvComponent. This is purely as an example.
+ You will likely want to add the component to your already specified request handlers. -->
+ <requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler">
+ <lst name="defaults">
+ <bool name="tv">true</bool>
+ </lst>
+ <arr name="last-components">
+ <str>tvComponent</str>
+ </arr>
+ </requestHandler>
+
+
<!-- a search component that enables you to configure the top results for
a given query regardless of the normal lucene scoring.-->
<searchComponent name="elevator" class="solr.QueryElevationComponent" >
Added: lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java?rev=707399&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java Thu Oct 23 08:49:18 2008
@@ -0,0 +1,50 @@
+package org.apache.solr.common.params;
+
+
+/**
+ *
+ *
+ **/
+public interface TermVectorParams {
+
+ public static final String TV_PREFIX = "tv.";
+
+ /**
+ * Return Term Frequency info
+ * */
+ public static final String TF = TV_PREFIX + "tf";
+ /**
+ * Return Term Vector position information
+ *
+ * */
+ public static final String POSITIONS = TV_PREFIX + "positions";
+ /**
+ * Return offset information, if available
+ * */
+ public static final String OFFSETS = TV_PREFIX + "offsets";
+ /**
+ * Return IDF information. May be expensive
+ * */
+ public static final String IDF = TV_PREFIX + "idf";
+
+ /**
+ * Return TF-IDF calculation, i.e. (tf / idf). May be expensive.
+ */
+ public static final String TF_IDF = TV_PREFIX + "tf-idf";
+
+
+ /**
+ * Return all the options: TF, positions, offsets, idf
+ */
+ public static final String ALL = TV_PREFIX + "all";
+
+ /**
+ * The fields to get term vectors for
+ */
+ public static final String FIELDS = TV_PREFIX + "fl";
+
+ /**
+ * The Doc Ids (Lucene internal ids) of the docs to get the term vectors for
+ */
+ public static final String DOC_IDS = TV_PREFIX + "docIds";
+}
Propchange: lucene/solr/trunk/src/java/org/apache/solr/common/params/TermVectorParams.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java?rev=707399&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java Thu Oct 23 08:49:18 2008
@@ -0,0 +1,307 @@
+package org.apache.solr.handler.component;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.SetBasedFieldSelector;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermVectorMapper;
+import org.apache.lucene.index.TermVectorOffsetInfo;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.params.TermVectorParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocListAndSet;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.RefCounted;
+import org.apache.solr.util.plugin.SolrCoreAware;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.logging.Logger;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Return term vectors for the documents in a query result set.
+ * <p/>
+ * Info available:
+ * term, frequency, position, offset, IDF.
+ * <p/>
+ * <b>Note</b> Returning IDF can be expensive.
+ */
+public class TermVectorComponent extends SearchComponent implements SolrCoreAware {
+ private transient static Logger log = Logger.getLogger(TermVectorComponent.class.getName());
+
+ public static final String COMPONENT_NAME = "tv";
+
+ protected NamedList initParams;
+ public static final String TERM_VECTORS = "termVectors";
+
+
+ public void process(ResponseBuilder rb) throws IOException {
+ SolrParams params = rb.req.getParams();
+ if (!params.getBool(COMPONENT_NAME, false)) {
+ return;
+ }
+
+ NamedList termVectors = new NamedList();
+ rb.rsp.add(TERM_VECTORS, termVectors);
+ //figure out what options we have, and try to get the appropriate vector
+ boolean termFreq = params.getBool(TermVectorParams.TF, false);
+ boolean positions = params.getBool(TermVectorParams.POSITIONS, false);
+ boolean offsets = params.getBool(TermVectorParams.OFFSETS, false);
+ boolean idf = params.getBool(TermVectorParams.IDF, false);
+ boolean tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
+ //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
+
+ boolean all = params.getBool(TermVectorParams.ALL, false);
+ if (all == true){
+ termFreq = true;
+ positions = true;
+ offsets = true;
+ idf = true;
+ tfIdf = true;
+ }
+
+ String[] fields = params.getParams(TermVectorParams.FIELDS);
+ if (fields == null) {
+ fields = params.getParams(CommonParams.FL);
+ }
+ DocListAndSet listAndSet = rb.getResults();
+ List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
+ Iterator<Integer> iter;
+ if (docIds != null && docIds.isEmpty() == false) {
+ iter = docIds.iterator();
+ } else {
+ DocList list = listAndSet.docList;
+ iter = list.iterator();
+ }
+ SolrCore core = rb.req.getCore();
+ RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
+ try {
+ IndexReader reader = searcher.get().getReader();
+ TVMapper mapper = new TVMapper(fields, reader, termFreq, positions, offsets, idf, tfIdf);
+ IndexSchema schema = core.getSchema();
+ String uniqFieldName = schema.getUniqueKeyField().getName();
+ SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.singleton(uniqFieldName), Collections.emptySet());
+ while (iter.hasNext()) {
+ Integer docId = iter.next();
+ NamedList docNL = new NamedList();
+ termVectors.add("doc-" + docId, docNL);
+ mapper.docNL = docNL;
+ Document document = reader.document(docId, fieldSelector);
+ String uniqId = document.get(uniqFieldName);
+ docNL.add("uniqueKey", uniqId);
+ reader.getTermFreqVector(docId, mapper);
+ }
+ termVectors.add("uniqueKeyFieldName", uniqFieldName);
+ } finally {
+ searcher.decref();
+ }
+ }
+
+ private List<Integer> getInts(String[] vals) {
+ List<Integer> result = null;
+ if (vals != null && vals.length > 0) {
+ result = new ArrayList<Integer>(vals.length);
+ for (int i = 0; i < vals.length; i++) {
+ try {
+ result.add(new Integer(vals[i]));
+ } catch (NumberFormatException e) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e.getMessage(), e);
+ }
+ }
+ }
+ return result;
+ }
+
+ @Override
+ public int distributedProcess(ResponseBuilder rb) throws IOException {
+ int result = ResponseBuilder.STAGE_DONE;
+ if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
+ //Go ask each shard for it's vectors
+ // for each shard, collect the documents for that shard.
+ HashMap<String, Collection<ShardDoc>> shardMap = new HashMap<String, Collection<ShardDoc>>();
+ for (ShardDoc sdoc : rb.resultIds.values()) {
+ Collection<ShardDoc> shardDocs = shardMap.get(sdoc.shard);
+ if (shardDocs == null) {
+ shardDocs = new ArrayList<ShardDoc>();
+ shardMap.put(sdoc.shard, shardDocs);
+ }
+ shardDocs.add(sdoc);
+ }
+ // Now create a request for each shard to retrieve the stored fields
+ for (Collection<ShardDoc> shardDocs : shardMap.values()) {
+ ShardRequest sreq = new ShardRequest();
+ sreq.purpose = ShardRequest.PURPOSE_GET_FIELDS;
+
+ sreq.shards = new String[]{shardDocs.iterator().next().shard};
+
+ sreq.params = new ModifiableSolrParams();
+
+ // add original params
+ sreq.params.add(rb.req.getParams());
+ sreq.params.remove(CommonParams.Q);//remove the query
+ ArrayList<String> ids = new ArrayList<String>(shardDocs.size());
+ for (ShardDoc shardDoc : shardDocs) {
+ ids.add(shardDoc.id.toString());
+ }
+ sreq.params.add(TermVectorParams.DOC_IDS, StrUtils.join(ids, ','));
+
+ rb.addRequest(this, sreq);
+ }
+ result = ResponseBuilder.STAGE_DONE;
+ }
+ return result;
+ }
+
+ private class TVMapper extends TermVectorMapper {
+ private NamedList docNL;
+ private IndexReader reader;
+ private Set<String> fields;
+ private boolean termFreq, positions, offsets, idf, tfIdf;
+ //internal vars not passed in by construction
+ private boolean map, useOffsets, usePositions;
+ //private Map<String, Integer> idfCache;
+ private NamedList fieldNL;
+ private Term currentTerm;
+
+ public TVMapper(String[] fields, IndexReader reader, boolean termFreq, boolean positions, boolean offsets, boolean idf, boolean tfIdf) {
+
+ this.reader = reader;
+ this.fields = fields != null ? new HashSet<String>(Arrays.asList(fields)) : Collections.<String>emptySet();
+ this.termFreq = termFreq;
+ this.positions = positions;
+ this.offsets = offsets;
+ this.idf = idf;
+ this.tfIdf = tfIdf;
+
+ }
+
+ public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+ if (map == true && fieldNL != null) {
+ NamedList termInfo = new NamedList();
+ fieldNL.add(term, termInfo);
+ if (termFreq == true) {
+ termInfo.add("freq", frequency);
+ }
+ if (useOffsets == true) {
+ NamedList theOffsets = new NamedList();
+ termInfo.add("offsets", theOffsets);
+ for (int i = 0; i < offsets.length; i++) {
+ TermVectorOffsetInfo offset = offsets[i];
+ theOffsets.add("start", offset.getStartOffset());
+ theOffsets.add("end", offset.getEndOffset());
+ }
+ }
+ if (usePositions == true) {
+ NamedList positionsNL = new NamedList();
+ for (int i = 0; i < positions.length; i++) {
+ positionsNL.add("position", positions[i]);
+ }
+ termInfo.add("positions", positionsNL);
+ }
+ if (idf == true) {
+ termInfo.add("idf", getIdf(term));
+ }
+ if (tfIdf == true){
+ double tfIdfVal = ((double) frequency) / getIdf(term);
+ termInfo.add("tf-idf", tfIdfVal);
+ }
+ }
+ }
+
+ private int getIdf(String term) {
+ int result = 1;
+ currentTerm = currentTerm.createTerm(term);
+ try {
+ TermEnum termEnum = reader.terms(currentTerm);
+ if (termEnum != null && termEnum.term().equals(currentTerm)) {
+ result = termEnum.docFreq();
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ return result;
+ }
+
+ public void setExpectations(String field, int numTerms, boolean storeOffsets, boolean storePositions) {
+
+ if (idf == true && reader != null) {
+ this.currentTerm = new Term(field);
+ }
+ useOffsets = storeOffsets && offsets;
+ usePositions = storePositions && positions;
+ if (fields.isEmpty() || fields.contains(field)) {
+ map = true;
+ fieldNL = new NamedList();
+ docNL.add(field, fieldNL);
+ } else {
+ map = false;
+ fieldNL = null;
+ }
+ }
+ }
+
+ public void prepare(ResponseBuilder rb) throws IOException {
+
+ }
+
+ //////////////////////// NamedListInitializedPlugin methods //////////////////////
+ @Override
+ public void init(NamedList args) {
+ super.init(args);
+ this.initParams = args;
+ }
+
+ public void inform(SolrCore core) {
+
+ }
+
+ public String getVersion() {
+ return "$Revision$";
+ }
+
+ public String getSourceId() {
+ return "$Id:$";
+ }
+
+ public String getSource() {
+ return "$Revision:$";
+ }
+
+ public String getDescription() {
+ return "A Component for working with Term Vectors";
+ }
+}
Propchange: lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermVectorComponent.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java?rev=707399&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java (added)
+++ lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java Thu Oct 23 08:49:18 2008
@@ -0,0 +1,211 @@
+package org.apache.solr.handler.component;
+
+import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.TermVectorParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.request.SolrRequestHandler;
+import org.apache.solr.request.SolrQueryResponse;
+import org.apache.solr.request.LocalSolrQueryRequest;
+
+import java.util.HashMap;
+import java.util.ArrayList;
+import java.util.Arrays;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ *
+ *
+ **/
+public class TermVectorComponentTest extends AbstractSolrTestCase {
+ @Override
+ public String getSchemaFile() {
+ return "schema.xml";
+ }
+
+ @Override
+ public String getSolrConfigFile() {
+ return "solrconfig.xml";
+ }
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ assertU(adoc("id", "0", "test_posofftv", "This is a title and another title"));
+ assertU(adoc("id", "1", "test_posofftv",
+ "The quick reb fox jumped over the lazy brown dogs."));
+ assertU(adoc("id", "2", "test_posofftv", "This is a document"));
+ assertU(adoc("id", "3", "test_posofftv", "another document"));
+ //bunch of docs that are variants on blue
+ assertU(adoc("id", "4", "test_posofftv", "blue"));
+ assertU(adoc("id", "5", "test_posofftv", "blud"));
+ assertU(adoc("id", "6", "test_posofftv", "boue"));
+ assertU(adoc("id", "7", "test_posofftv", "glue"));
+ assertU(adoc("id", "8", "test_posofftv", "blee"));
+ assertU(adoc("id", "9", "test_posofftv", "blah"));
+
+ assertU("commit", commit());
+ }
+
+ public void testBasics() throws Exception {
+ SolrCore core = h.getCore();
+ SearchComponent tvComp = core.getSearchComponent("tvComponent");
+ assertTrue("tvComp is null and it shouldn't be", tvComp != null);
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.add(CommonParams.Q, "id:0");
+ params.add(CommonParams.QT, "tvrh");
+ params.add(TermVectorParams.TF, "true");
+ params.add(TermVectorComponent.COMPONENT_NAME, "true");
+ SolrRequestHandler handler = core.getRequestHandler("tvrh");
+ SolrQueryResponse rsp;
+ rsp = new SolrQueryResponse();
+ rsp.add("responseHeader", new SimpleOrderedMap());
+ handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+ NamedList values = rsp.getValues();
+ NamedList termVectors = (NamedList) values.get(TermVectorComponent.TERM_VECTORS);
+ assertTrue("termVectors is null and it shouldn't be", termVectors != null);
+ System.out.println("TVs:" + termVectors);
+ NamedList doc = (NamedList) termVectors.getVal(0);
+ assertTrue("doc is null and it shouldn't be", doc != null);
+ assertTrue(doc.size() + " does not equal: " + 2, doc.size() == 2);
+ NamedList field = (NamedList) doc.get("test_posofftv");
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue(field.size() + " does not equal: " + 2, field.size() == 2);
+ NamedList titl = (NamedList) field.get("titl");
+ assertTrue("titl is null and it shouldn't be", titl != null);
+ assertTrue(titl.get("freq") + " does not equal: " + 2, ((Integer) titl.get("freq")) == 2);
+
+ String uniqueKeyFieldName = (String) termVectors.getVal(1);
+ assertTrue("uniqueKeyFieldName is null and it shouldn't be", uniqueKeyFieldName != null);
+ assertTrue(uniqueKeyFieldName + " is not equal to " + "id", uniqueKeyFieldName.equals("id") == true);
+
+ }
+
+ public void testOptions() throws Exception {
+ SolrCore core = h.getCore();
+ SearchComponent tvComp = core.getSearchComponent("tvComponent");
+ assertTrue("tvComp is null and it shouldn't be", tvComp != null);
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.add(CommonParams.Q, "id:0");
+ params.add(CommonParams.QT, "tvrh");
+ params.add(TermVectorParams.TF, "true");
+ params.add(TermVectorParams.IDF, "true");
+ params.add(TermVectorParams.OFFSETS, "true");
+ params.add(TermVectorParams.POSITIONS, "true");
+ params.add(TermVectorParams.TF_IDF, "true");
+ params.add(TermVectorComponent.COMPONENT_NAME, "true");
+
+ SolrRequestHandler handler = core.getRequestHandler("tvrh");
+ SolrQueryResponse rsp;
+ rsp = new SolrQueryResponse();
+ rsp.add("responseHeader", new SimpleOrderedMap());
+ handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+ NamedList values = rsp.getValues();
+ NamedList termVectors = (NamedList) values.get(TermVectorComponent.TERM_VECTORS);
+ assertTrue("termVectors is null and it shouldn't be", termVectors != null);
+ System.out.println("TVs: " + termVectors);
+ NamedList doc = (NamedList) termVectors.getVal(0);
+ assertTrue("doc is null and it shouldn't be", doc != null);
+ assertTrue(doc.size() + " does not equal: " + 2, doc.size() == 2);
+ }
+
+
+ public void testNoFields() throws Exception {
+ SolrCore core = h.getCore();
+ SearchComponent tvComp = core.getSearchComponent("tvComponent");
+ assertTrue("tvComp is null and it shouldn't be", tvComp != null);
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ params.add(CommonParams.Q, "id:0");
+ params.add(CommonParams.QT, "tvrh");
+ params.add(TermVectorParams.TF, "true");
+ //Pass in a field that doesn't exist on the doc, thus, no vectors should be returned
+ params.add(TermVectorParams.FIELDS, "foo");
+ params.add(TermVectorComponent.COMPONENT_NAME, "true");
+ SolrRequestHandler handler = core.getRequestHandler("tvrh");
+ SolrQueryResponse rsp;
+ rsp = new SolrQueryResponse();
+ rsp.add("responseHeader", new SimpleOrderedMap());
+ handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+ NamedList values = rsp.getValues();
+ NamedList termVectors = (NamedList) values.get(TermVectorComponent.TERM_VECTORS);
+ assertTrue("termVectors is null and it shouldn't be", termVectors != null);
+ NamedList doc = (NamedList) termVectors.getVal(0);
+ assertTrue("doc is null and it shouldn't be", doc != null);
+ assertTrue(doc.size() + " does not equal: " + 1, doc.size() == 1);
+ }
+
+ public void testDistributed() throws Exception {
+ SolrCore core = h.getCore();
+ TermVectorComponent tvComp = (TermVectorComponent) core.getSearchComponent("tvComponent");
+ assertTrue("tvComp is null and it shouldn't be", tvComp != null);
+ ModifiableSolrParams params = new ModifiableSolrParams();
+ ResponseBuilder rb = new ResponseBuilder();
+ rb.stage = ResponseBuilder.STAGE_GET_FIELDS;
+ rb.shards = new String[]{"localhost:0", "localhost:1", "localhost:2", "localhost:3"};//we don't actually call these, since we are going to invoke distributedProcess directly
+ rb.resultIds = new HashMap<Object, ShardDoc>();
+ rb.components = new ArrayList<SearchComponent>();
+ rb.components.add(tvComp);
+ params.add(CommonParams.Q, "id:0");
+ params.add(CommonParams.QT, "tvrh");
+ params.add(TermVectorParams.TF, "true");
+ params.add(TermVectorParams.IDF, "true");
+ params.add(TermVectorParams.OFFSETS, "true");
+ params.add(TermVectorParams.POSITIONS, "true");
+ params.add(TermVectorComponent.COMPONENT_NAME, "true");
+ rb.req = new LocalSolrQueryRequest(core, params);
+ rb.outgoing = new ArrayList<ShardRequest>();
+ //one doc per shard, but make sure there are enough docs to go around
+ for (int i = 0; i < rb.shards.length; i++){
+ ShardDoc doc = new ShardDoc();
+ doc.id = i; //must be a valid doc that was indexed.
+ doc.score = 1 - (i / (float)rb.shards.length);
+ doc.positionInResponse = i;
+ doc.shard = rb.shards[i];
+ doc.orderInShard = 0;
+ rb.resultIds.put(doc.id, doc);
+ }
+
+ int result = tvComp.distributedProcess(rb);
+ assertTrue(result + " does not equal: " + ResponseBuilder.STAGE_DONE, result == ResponseBuilder.STAGE_DONE);
+ //one outgoing per shard
+ assertTrue("rb.outgoing Size: " + rb.outgoing.size() + " is not: " + rb.shards.length, rb.outgoing.size() == rb.shards.length);
+ for (ShardRequest request : rb.outgoing) {
+ ModifiableSolrParams solrParams = request.params;
+ System.out.println("Shard: " + Arrays.asList(request.shards) + " Params: " + solrParams);
+ }
+ }
+
+}
+
+
+
+
+
+/*
+* <field name="test_basictv" type="text" termVectors="true"/>
+ <field name="test_notv" type="text" termVectors="false"/>
+ <field name="test_postv" type="text" termVectors="true" termPositions="true"/>
+ <field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
+ <field name="test_posofftv" type="text" termVectors="true"
+ termPositions="true" termOffsets="true"/>
+*
+* */
Propchange: lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml?rev=707399&r1=707398&r2=707399&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml (original)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig.xml Thu Oct 23 08:49:18 2008
@@ -369,6 +369,18 @@
</arr>
</requestHandler>
+
+ <searchComponent name="tvComponent" class="org.apache.solr.handler.component.TermVectorComponent"/>
+
+ <requestHandler name="tvrh" class="org.apache.solr.handler.component.SearchHandler">
+ <lst name="defaults">
+
+ </lst>
+ <arr name="last-components">
+ <str>tvComponent</str>
+ </arr>
+ </requestHandler>
+
<highlighting>
<!-- Configure the standard fragmenter -->
<fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">