You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2012/06/11 23:08:48 UTC
svn commit: r1349012 - in /lucene/dev/trunk/solr: CHANGES.txt
core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java
Author: hossman
Date: Mon Jun 11 21:08:48 2012
New Revision: 1349012
URL: http://svn.apache.org/viewvc?rev=1349012&view=rev
Log:
SOLR-3537: Fixed TermVectorComponent so that it will not fail if the fl param contains globs or psuedo-fields
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1349012&r1=1349011&r2=1349012&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Mon Jun 11 21:08:48 2012
@@ -491,6 +491,9 @@ Bug Fixes
* SOLR-2923: IllegalArgumentException when using useFilterForSortedQuery on an
empty index. (Adrien Grand via Mark Miller)
+* SOLR-3537: Fixed TermVectorComponent so that it will not fail if the fl
+ param contains globs or psuedo-fields (hossman)
+
Other Changes
----------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java?rev=1349012&r1=1349011&r2=1349012&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java Mon Jun 11 21:08:48 2012
@@ -1,9 +1,13 @@
package org.apache.solr.handler.component;
import java.io.IOException;
+import java.util.Arrays;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.LinkedHashSet;
+import java.util.Set;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -27,6 +31,7 @@ import org.apache.solr.common.util.StrUt
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
+import org.apache.solr.search.ReturnFields;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocListAndSet;
import org.apache.solr.search.SolrIndexSearcher;
@@ -81,6 +86,51 @@ public class TermVectorComponent extends
protected NamedList initParams;
public static final String TERM_VECTORS = "termVectors";
+ /**
+ * Helper method for determining the list of fields that we should
+ * try to find term vectors on.
+ * <p>
+ * Does simple (non-glob-supporting) parsing on the
+ * {@link TermVectorParams#FIELDS} param if specified, otherwise it returns
+ * the concrete field values specified in {@link CommonParams.FL} --
+ * ignoring functions, transformers, or literals.
+ * </p>
+ * <p>
+ * If "fl=*" is used, or neither param is specified, then <code>null</code>
+ * will be returned. If the empty set is returned, it means the "fl"
+ * specified consisted entirely of things that are not real fields
+ * (ie: functions, transformers, partial-globs, score, etc...) and not
+ * supported by this component.
+ * </p>
+ */
+ private Set<String> getFields(ResponseBuilder rb) {
+ SolrParams params = rb.req.getParams();
+ String[] fldLst = params.getParams(TermVectorParams.FIELDS);
+ if (null == fldLst || 0 == fldLst.length ||
+ (1 == fldLst.length && 0 == fldLst[0].length())) {
+
+ // no tv.fl, parse the main fl
+ ReturnFields rf = new ReturnFields
+ (params.getParams(CommonParams.FL), rb.req);
+
+ if (rf.wantsAllFields()) {
+ return null;
+ }
+
+ Set<String> fieldNames = rf.getLuceneFieldNames();
+ return (null != fieldNames) ?
+ fieldNames :
+ // return empty set indicating no fields should be used
+ Collections.<String>emptySet();
+ }
+
+ // otherwise us the raw fldList as is, no special parsing or globs
+ Set<String> fieldNames = new LinkedHashSet<String>();
+ for (String fl : fldLst) {
+ fieldNames.addAll(Arrays.asList(SolrPluginUtils.split(fl)));
+ }
+ return fieldNames;
+ }
@Override
public void process(ResponseBuilder rb) throws IOException {
@@ -108,11 +158,6 @@ public class TermVectorComponent extends
allFields.tfIdf = true;
}
- String fldLst = params.get(TermVectorParams.FIELDS);
- if (fldLst == null) {
- fldLst = params.get(CommonParams.FL);
- }
-
//use this to validate our fields
IndexSchema schema = rb.req.getSchema();
//Build up our per field mapping
@@ -122,10 +167,14 @@ public class TermVectorComponent extends
List<String> noPos = new ArrayList<String>();
List<String> noOff = new ArrayList<String>();
- //we have specific fields to retrieve
- if (fldLst != null) {
- String [] fields = SolrPluginUtils.split(fldLst);
+ Set<String> fields = getFields(rb);
+ if ( null != fields ) {
+ //we have specific fields to retrieve, or no fields
for (String field : fields) {
+
+ // workarround SOLR-3523
+ if (null == field || "score".equals(field)) continue;
+
SchemaField sf = schema.getFieldOrNull(field);
if (sf != null) {
if (sf.storeTermVector()) {
@@ -240,7 +289,7 @@ public class TermVectorComponent extends
termVectors.add("uniqueKeyFieldName", uniqFieldName);
}
}
- if (!fieldOptions.isEmpty()) {
+ if ( null != fields ) {
for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
final String field = entry.getKey();
final Terms vector = reader.getTermVector(docId, field);
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java?rev=1349012&r1=1349011&r2=1349012&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java Mon Jun 11 21:08:48 2012
@@ -128,6 +128,73 @@ public class TermVectorComponentTest ext
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
);
+ // tv.fl diff from fl
+ assertJQ(req("json.nl","map",
+ "qt",tv,
+ "q", "id:0",
+ "fl", "*,score",
+ "tv.fl", "test_basictv,test_offtv",
+ TermVectorComponent.COMPONENT_NAME, "true",
+ TermVectorParams.TF, "true")
+ ,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
+ " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
+ " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
+ " 'uniqueKeyFieldName':'id'}"
+ );
+ // multi-valued tv.fl
+ assertJQ(req("json.nl","map",
+ "qt",tv,
+ "q", "id:0",
+ "fl", "*,score",
+ "tv.fl", "test_basictv",
+ "tv.fl","test_offtv",
+ TermVectorComponent.COMPONENT_NAME, "true",
+ TermVectorParams.TF, "true")
+ ,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
+ " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
+ " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
+ " 'uniqueKeyFieldName':'id'}"
+ );
+ // re-use fl glob
+ assertJQ(req("json.nl","map",
+ "qt",tv,
+ "q", "id:0",
+ "fl", "*,score",
+ TermVectorComponent.COMPONENT_NAME, "true",
+ TermVectorParams.TF, "true")
+ ,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
+ " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
+ " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
+ " 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
+ " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
+ " 'uniqueKeyFieldName':'id'}"
+ );
+ // re-use fl, ignore things we can't handle
+ assertJQ(req("json.nl","map",
+ "qt",tv,
+ "q", "id:0",
+ "fl", "score,test_basictv,[docid],test_postv,val:sum(3,4)",
+ TermVectorComponent.COMPONENT_NAME, "true",
+ TermVectorParams.TF, "true")
+ ,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
+ " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
+ " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
+ " 'uniqueKeyFieldName':'id'}"
+ );
+ // re-use (multi-valued) fl, ignore things we can't handle
+ assertJQ(req("json.nl","map",
+ "qt",tv,
+ "q", "id:0",
+ "fl", "score,test_basictv",
+ "fl", "[docid],test_postv,val:sum(3,4)",
+ TermVectorComponent.COMPONENT_NAME, "true",
+ TermVectorParams.TF, "true")
+ ,"/termVectors=={'doc-0':{'uniqueKey':'0'," +
+ " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
+ " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
+ " 'uniqueKeyFieldName':'id'}"
+ );
+
}
@Test