You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2015/03/27 03:10:01 UTC
svn commit: r1669494 - in /lucene/dev/branches/branch_5x: ./ solr/
solr/core/ solr/core/src/java/org/apache/solr/handler/admin/
solr/core/src/java/org/apache/solr/handler/component/
solr/core/src/java/org/apache/solr/schema/ solr/core/src/test-files/so...
Author: dsmiley
Date: Fri Mar 27 02:10:00 2015
New Revision: 1669494
URL: http://svn.apache.org/r1669494
Log:
SOLR-5911: term vector payload support
in schema & TermVectorComponent & LukeRequestHandler
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/solr/ (props changed)
lucene/dev/branches/branch_5x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/solr/core/ (props changed)
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/EnumField.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldType.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaField.java
lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema.xml
lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java
lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java
lucene/dev/branches/branch_5x/solr/server/ (props changed)
lucene/dev/branches/branch_5x/solr/server/solr/configsets/sample_techproducts_configs/conf/schema.xml
lucene/dev/branches/branch_5x/solr/solrj/ (props changed)
lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/luke/FieldFlag.java
lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/TermVectorParams.java
Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Fri Mar 27 02:10:00 2015
@@ -178,6 +178,10 @@ New Features
* SOLR-7240: '/' redirects to '/solr/' for convinience (Martijn Koster, hossman)
+* SOLR-5911: Added payload support for term vectors. New "termPayloads" option for fields
+ / types in the schema, and "tv.payloads" param for the term vector component.
+ (Mike McCandless, David Smiley)
+
Bug Fixes
----------------------
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java Fri Mar 27 02:10:00 2015
@@ -20,7 +20,16 @@ package org.apache.solr.handler.admin;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharFilterFactory;
@@ -28,9 +37,20 @@ import org.apache.lucene.analysis.util.T
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SegmentReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
@@ -49,18 +69,18 @@ import org.apache.solr.common.util.Simpl
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.FieldType;
-import org.apache.solr.update.SolrIndexWriter;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
-import org.apache.solr.schema.CopyField;
import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.update.SolrIndexWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
-import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS;
import static org.apache.lucene.index.IndexOptions.DOCS;
+import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS;
+import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
/**
* This handler exposes the internal lucene index. It is inspired by and
@@ -186,6 +206,7 @@ public class LukeRequestHandler extends
flags.append( (f != null && f.fieldType().storeTermVectors()) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().storeTermVectorOffsets()) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().storeTermVectorPositions()) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
+ flags.append( (f != null && f.fieldType().storeTermVectorPayloads()) ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-' );
flags.append( (f != null && f.fieldType().omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
flags.append( (f != null && DOCS == opts ) ?
@@ -224,6 +245,7 @@ public class LukeRequestHandler extends
flags.append( (f != null && f.storeTermVector() ) ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermOffsets() ) ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
+ flags.append( (f != null && f.storeTermPayloads() ) ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-' );
flags.append( (f != null && f.omitNorms()) ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
flags.append( (f != null &&
f.omitTermFreqAndPositions() ) ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java Fri Mar 27 02:10:00 2015
@@ -11,10 +11,10 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
-import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
@@ -24,6 +24,7 @@ import org.apache.solr.common.SolrExcept
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.TermVectorParams;
+import org.apache.solr.common.util.Base64;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
@@ -58,7 +59,7 @@ import org.apache.solr.util.plugin.SolrC
* Return term vectors for the documents in a query result set.
* <p>
* Info available:
- * term, frequency, position, offset, IDF.
+ * term, frequency, position, offset, payloads, IDF.
* <p>
* <b>Note</b> Returning IDF can be expensive.
*
@@ -153,6 +154,7 @@ public class TermVectorComponent extends
allFields.termFreq = params.getBool(TermVectorParams.TF, false);
allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
+ allFields.payloads = params.getBool(TermVectorParams.PAYLOADS, false);
allFields.docFreq = params.getBool(TermVectorParams.DF, false);
allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
//boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
@@ -161,6 +163,7 @@ public class TermVectorComponent extends
allFields.termFreq = true;
allFields.positions = true;
allFields.offsets = true;
+ allFields.payloads = true;
allFields.docFreq = true;
allFields.tfIdf = true;
}
@@ -171,6 +174,7 @@ public class TermVectorComponent extends
List<String> noTV = new ArrayList<>();
List<String> noPos = new ArrayList<>();
List<String> noOff = new ArrayList<>();
+ List<String> noPay = new ArrayList<>();
Set<String> fields = getFields(rb);
if ( null != fields ) {
@@ -207,6 +211,10 @@ public class TermVectorComponent extends
if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey){
noOff.add(field);
}
+ option.payloads = params.getFieldBool(field, TermVectorParams.PAYLOADS, allFields.payloads);
+ if (option.payloads && !sf.storeTermPayloads() && !fieldIsUniqueKey){
+ noPay.add(field);
+ }
} else {//field doesn't have term vectors
if (!fieldIsUniqueKey) noTV.add(field);
}
@@ -234,6 +242,10 @@ public class TermVectorComponent extends
warnings.add("noOffsets", noOff);
hasWarnings = true;
}
+ if (!noPay.isEmpty()) {
+ warnings.add("noPayloads", noPay);
+ hasWarnings = true;
+ }
if (hasWarnings) {
termVectors.add("warnings", warnings);
}
@@ -341,22 +353,27 @@ public class TermVectorComponent extends
termInfo.add("tf", freq);
}
- dpEnum = termsEnum.postings(null, dpEnum, PostingsEnum.ALL);
- boolean useOffsets = false;
- boolean usePositions = false;
+ int dpEnumFlags = 0;
+ dpEnumFlags |= fieldOptions.positions ? PostingsEnum.POSITIONS : 0;
+ //payloads require offsets
+ dpEnumFlags |= (fieldOptions.offsets || fieldOptions.payloads) ? PostingsEnum.OFFSETS : 0;
+ dpEnumFlags |= fieldOptions.payloads ? PostingsEnum.PAYLOADS : 0;
+ dpEnum = termsEnum.postings(null, dpEnum, dpEnumFlags);
+
+ boolean atNextDoc = false;
if (dpEnum != null) {
dpEnum.nextDoc();
- usePositions = fieldOptions.positions;
- useOffsets = fieldOptions.offsets;
+ atNextDoc = true;
}
- NamedList<Integer> positionsNL = null;
- NamedList<Number> theOffsets = null;
+ if (atNextDoc && dpEnumFlags != 0) {
+ NamedList<Integer> positionsNL = null;
+ NamedList<Number> theOffsets = null;
+ NamedList<String> thePayloads = null;
- if (usePositions || useOffsets) {
for (int i = 0; i < freq; i++) {
final int pos = dpEnum.nextPosition();
- if (usePositions && pos >= 0) {
+ if (fieldOptions.positions && pos >= 0) {
if (positionsNL == null) {
positionsNL = new NamedList<>();
termInfo.add("positions", positionsNL);
@@ -364,19 +381,24 @@ public class TermVectorComponent extends
positionsNL.add("position", pos);
}
- if (useOffsets && theOffsets == null) {
- if (dpEnum.startOffset() == -1) {
- useOffsets = false;
- } else {
+ int startOffset = fieldOptions.offsets ? dpEnum.startOffset() : -1;
+ if (startOffset >= 0) {
+ if (theOffsets == null) {
theOffsets = new NamedList<>();
termInfo.add("offsets", theOffsets);
}
- }
-
- if (theOffsets != null) {
theOffsets.add("start", dpEnum.startOffset());
theOffsets.add("end", dpEnum.endOffset());
}
+
+ BytesRef payload = fieldOptions.payloads ? dpEnum.getPayload() : null;
+ if (payload != null) {
+ if (thePayloads == null) {
+ thePayloads = new NamedList<>();
+ termInfo.add("payloads", thePayloads);
+ }
+ thePayloads.add("payload", Base64.byteArrayToBase64(payload.bytes, payload.offset, payload.length));
+ }
}
}
@@ -472,5 +494,5 @@ public class TermVectorComponent extends
class FieldOptions {
String fieldName;
- boolean termFreq, positions, offsets, docFreq, tfIdf;
+ boolean termFreq, positions, offsets, payloads, docFreq, tfIdf;
}
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/EnumField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/EnumField.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/EnumField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/EnumField.java Fri Mar 27 02:10:00 2015
@@ -17,6 +17,21 @@ package org.apache.solr.schema;
* limitations under the License.
*/
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathExpressionException;
+import javax.xml.xpath.XPathFactory;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
@@ -24,7 +39,11 @@ import org.apache.lucene.index.IndexOpti
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.EnumFieldSource;
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocValuesRangeQuery;
+import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@@ -42,17 +61,6 @@ import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathConstants;
-import javax.xml.xpath.XPathExpressionException;
-import javax.xml.xpath.XPathFactory;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.*;
-
/***
* Field type for support of string values with custom sort order.
*/
@@ -388,6 +396,7 @@ public class EnumField extends Primitive
newType.setStoreTermVectors(field.storeTermVector());
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
newType.setStoreTermVectorPositions(field.storeTermPositions());
+ newType.setStoreTermVectorPayloads(field.storeTermPayloads());
newType.setNumericType(FieldType.NumericType.INT);
newType.setNumericPrecisionStep(DEFAULT_PRECISION_STEP);
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java Fri Mar 27 02:10:00 2015
@@ -17,8 +17,8 @@
package org.apache.solr.schema;
-import java.util.Map;
import java.util.HashMap;
+import java.util.Map;
/**
*
@@ -52,13 +52,15 @@ public abstract class FieldProperties {
protected final static int STORE_OFFSETS = 0x00004000;
protected final static int DOC_VALUES = 0x00008000;
+ protected final static int STORE_TERMPAYLOADS = 0x00010000;
+
static final String[] propertyNames = {
"indexed", "tokenized", "stored",
"binary", "omitNorms", "omitTermFreqAndPositions",
"termVectors", "termPositions", "termOffsets",
"multiValued",
"sortMissingFirst","sortMissingLast","required", "omitPositions",
- "storeOffsetsWithPositions", "docValues"
+ "storeOffsetsWithPositions", "docValues", "termPayloads"
};
static final Map<String,Integer> propertyMap = new HashMap<>();
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldType.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldType.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldType.java Fri Mar 27 02:10:00 2015
@@ -17,8 +17,6 @@
package org.apache.solr.schema;
-import static org.apache.lucene.analysis.util.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM;
-
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
@@ -67,6 +65,8 @@ import org.apache.solr.search.Sorting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.lucene.analysis.util.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM;
+
/**
* Base class for all field types used by an index schema.
*
@@ -268,6 +268,7 @@ public abstract class FieldType extends
newType.setStoreTermVectors(field.storeTermVector());
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
newType.setStoreTermVectorPositions(field.storeTermPositions());
+ newType.setStoreTermVectorPayloads(field.storeTermPayloads());
return createField(field.getName(), val, newType, boost);
}
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java Fri Mar 27 02:10:00 2015
@@ -33,7 +33,6 @@ import org.apache.lucene.index.Indexable
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.uninverting.UninvertingReader.Type;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.AttributeSource;
@@ -178,6 +177,7 @@ public class PreAnalyzedField extends Fi
newType.setStoreTermVectors(field.storeTermVector());
newType.setStoreTermVectorOffsets(field.storeTermOffsets());
newType.setStoreTermVectorPositions(field.storeTermPositions());
+ newType.setStoreTermVectorPayloads(field.storeTermPayloads());
return newType;
}
Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaField.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaField.java Fri Mar 27 02:10:00 2015
@@ -17,19 +17,18 @@
package org.apache.solr.schema;
-import org.apache.solr.common.SolrException;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.search.SortField;
-import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.search.QParser;
-
-import org.apache.solr.response.TextResponseWriter;
-
+import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.io.IOException;
+
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.search.SortField;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.response.TextResponseWriter;
+import org.apache.solr.search.QParser;
/**
* Encapsulates all information about a Field in a Solr Schema
@@ -94,6 +93,7 @@ public final class SchemaField extends F
public boolean storeTermVector() { return (properties & STORE_TERMVECTORS)!=0; }
public boolean storeTermPositions() { return (properties & STORE_TERMPOSITIONS)!=0; }
public boolean storeTermOffsets() { return (properties & STORE_TERMOFFSETS)!=0; }
+ public boolean storeTermPayloads() { return (properties & STORE_TERMPAYLOADS)!=0; }
public boolean omitNorms() { return (properties & OMIT_NORMS)!=0; }
public boolean omitTermFreqAndPositions() { return (properties & OMIT_TF_POSITIONS)!=0; }
@@ -236,7 +236,7 @@ public final class SchemaField extends F
if (on(falseProps,INDEXED)) {
int pp = (INDEXED
- | STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
+ | STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS | STORE_TERMPAYLOADS);
if (on(pp,trueProps)) {
throw new RuntimeException("SchemaField: " + name + " conflicting 'true' field options for non-indexed field:" + props);
}
@@ -269,7 +269,7 @@ public final class SchemaField extends F
}
if (on(falseProps,STORE_TERMVECTORS)) {
- int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
+ int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS | STORE_TERMPAYLOADS);
if (on(pp,trueProps)) {
throw new RuntimeException("SchemaField: " + name + " conflicting termvector field options:" + props);
}
@@ -323,6 +323,7 @@ public final class SchemaField extends F
properties.add(getPropertyName(STORE_TERMVECTORS), storeTermVector());
properties.add(getPropertyName(STORE_TERMPOSITIONS), storeTermPositions());
properties.add(getPropertyName(STORE_TERMOFFSETS), storeTermOffsets());
+ properties.add(getPropertyName(STORE_TERMPAYLOADS), storeTermPayloads());
properties.add(getPropertyName(OMIT_NORMS), omitNorms());
properties.add(getPropertyName(OMIT_TF_POSITIONS), omitTermFreqAndPositions());
properties.add(getPropertyName(OMIT_POSITIONS), omitPositions());
Modified: lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema.xml?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema.xml Fri Mar 27 02:10:00 2015
@@ -103,6 +103,16 @@
</analyzer>
</fieldType>
+ <fieldtype name="text_payload_tv" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.StandardFilterFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.StopFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
+ <filter class="org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilterFactory"/>
+ </analyzer>
+ </fieldtype>
<fieldType name="nametext" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
@@ -478,6 +488,8 @@
<field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
<field name="test_posofftv" type="text" termVectors="true"
termPositions="true" termOffsets="true"/>
+ <field name="test_posoffpaytv" type="text_payload_tv" termVectors="true"
+ termPositions="true" termOffsets="true" termPayloads="true"/>
<!-- test highlit field settings -->
<field name="test_hlt" type="highlittext" indexed="true"/>
Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java Fri Mar 27 02:10:00 2015
@@ -17,6 +17,8 @@
package org.apache.solr;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringWriter;
@@ -26,18 +28,10 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LazyDocument;
-import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext.Context;
-import org.apache.lucene.store.MockDirectoryWrapper;
-import org.apache.lucene.util.English;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
@@ -56,10 +50,7 @@ import org.apache.solr.schema.IndexSchem
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocList;
-import org.apache.solr.update.DirectUpdateHandler2;
-import org.apache.solr.util.RefCounted;
import org.junit.BeforeClass;
-import org.junit.Ignore;
import org.junit.Test;
/**
@@ -540,6 +531,11 @@ public class BasicFunctionalityTest exte
assertTrue(f.storeTermVector() && f.storeTermPositions() && f.storeTermOffsets());
assertTrue(luf.fieldType().storeTermVectorOffsets() && luf.fieldType().storeTermVectorPositions());
+ f = ischema.getField("test_posoffpaytv");
+ luf = f.createField("test", 0f);
+ assertTrue(f.storeTermVector() && f.storeTermPositions() && f.storeTermOffsets() && f.storeTermPayloads());
+ assertTrue(luf.fieldType().storeTermVectorOffsets() && luf.fieldType().storeTermVectorPositions() && luf.fieldType().storeTermVectorPayloads());
+
}
@Test
Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java Fri Mar 27 02:10:00 2015
@@ -1,19 +1,13 @@
package org.apache.solr.handler.component;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.TermVectorParams;
-import org.apache.solr.request.LocalSolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
import org.junit.BeforeClass;
import org.junit.Test;
-
-import java.util.HashMap;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -42,6 +36,7 @@ public class TermVectorComponentTest ext
initCore("solrconfig.xml","schema.xml");
assertU(adoc("id", "0",
+ "test_posoffpaytv", "This is a title and another title",
"test_posofftv", "This is a title and another title",
"test_basictv", "This is a title and another title",
"test_notv", "This is a title and another title",
@@ -49,6 +44,7 @@ public class TermVectorComponentTest ext
"test_offtv", "This is a title and another title"
));
assertU(adoc("id", "1",
+ "test_posoffpaytv", "The quick reb fox jumped over the lazy brown dogs.",
"test_posofftv", "The quick reb fox jumped over the lazy brown dogs.",
"test_basictv", "The quick reb fox jumped over the lazy brown dogs.",
"test_notv", "The quick reb fox jumped over the lazy brown dogs.",
@@ -56,6 +52,7 @@ public class TermVectorComponentTest ext
"test_offtv", "The quick reb fox jumped over the lazy brown dogs."
));
assertU(adoc("id", "2",
+ "test_posoffpaytv", "This is a document",
"test_posofftv", "This is a document",
"test_basictv", "This is a document",
"test_notv", "This is a document",
@@ -63,6 +60,7 @@ public class TermVectorComponentTest ext
"test_offtv", "This is a document"
));
assertU(adoc("id", "3",
+ "test_posoffpaytv", "another document",
"test_posofftv", "another document",
"test_basictv", "another document",
"test_notv", "another document",
@@ -71,6 +69,7 @@ public class TermVectorComponentTest ext
));
//bunch of docs that are variants on blue
assertU(adoc("id", "4",
+ "test_posoffpaytv", "blue",
"test_posofftv", "blue",
"test_basictv", "blue",
"test_notv", "blue",
@@ -78,6 +77,7 @@ public class TermVectorComponentTest ext
"test_offtv", "blue"
));
assertU(adoc("id", "5",
+ "test_posoffpaytv", "blud",
"test_posofftv", "blud",
"test_basictv", "blud",
"test_notv", "blud",
@@ -85,6 +85,7 @@ public class TermVectorComponentTest ext
"test_offtv", "blud"
));
assertU(adoc("id", "6",
+ "test_posoffpaytv", "boue",
"test_posofftv", "boue",
"test_basictv", "boue",
"test_notv", "boue",
@@ -92,6 +93,7 @@ public class TermVectorComponentTest ext
"test_offtv", "boue"
));
assertU(adoc("id", "7",
+ "test_posoffpaytv", "glue",
"test_posofftv", "glue",
"test_basictv", "glue",
"test_notv", "glue",
@@ -99,6 +101,7 @@ public class TermVectorComponentTest ext
"test_offtv", "glue"
));
assertU(adoc("id", "8",
+ "test_posoffpaytv", "blee",
"test_posofftv", "blee",
"test_basictv", "blee",
"test_notv", "blee",
@@ -106,6 +109,7 @@ public class TermVectorComponentTest ext
"test_offtv", "blee"
));
assertU(adoc("id", "9",
+ "test_posoffpaytv", "blah",
"test_posofftv", "blah",
"test_basictv", "blah",
"test_notv", "blah",
@@ -125,6 +129,7 @@ public class TermVectorComponentTest ext
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
+ " 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
);
@@ -166,6 +171,7 @@ public class TermVectorComponentTest ext
" 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
+ " 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
" 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
" 'uniqueKeyFieldName':'id'}"
);
@@ -241,7 +247,8 @@ public class TermVectorComponentTest ext
public void testPerField() throws Exception {
assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
,TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true"
- ,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv"
+ ,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv,test_posoffpaytv"
+ ,"f.test_posoffpaytv." + TermVectorParams.PAYLOADS, "false"
,"f.test_posofftv." + TermVectorParams.POSITIONS, "false"
,"f.test_offtv." + TermVectorParams.OFFSETS, "false"
,"f.test_basictv." + TermVectorParams.DF, "false"
@@ -255,6 +262,17 @@ public class TermVectorComponentTest ext
);
}
+ @Test
+ public void testPayloads() throws Exception {
+ // This field uses TokenOffsetPayloadTokenFilter, which
+ // stuffs start (20) and end offset (27) into the
+ // payload:
+ assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
+ , TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true",
+ TermVectorParams.PAYLOADS, "true")
+ ,"/termVectors/0/test_posoffpaytv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'payloads':{'payload': 'AAAAFAAAABs='}, 'df':2, 'tf-idf':0.5}"
+ );
+ }
}
Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java Fri Mar 27 02:10:00 2015
@@ -24,7 +24,7 @@ public class TestFieldResource extends S
public void testGetField() throws Exception {
assertQ("/schema/fields/test_postv?indent=on&wt=xml&showDefaults=true",
"count(/response/lst[@name='field']) = 1",
- "count(/response/lst[@name='field']/*) = 15",
+ "count(/response/lst[@name='field']/*) = 16",
"/response/lst[@name='field']/str[@name='name'] = 'test_postv'",
"/response/lst[@name='field']/str[@name='type'] = 'text'",
"/response/lst[@name='field']/bool[@name='indexed'] = 'true'",
@@ -32,6 +32,7 @@ public class TestFieldResource extends S
"/response/lst[@name='field']/bool[@name='docValues'] = 'false'",
"/response/lst[@name='field']/bool[@name='termVectors'] = 'true'",
"/response/lst[@name='field']/bool[@name='termPositions'] = 'true'",
+ "/response/lst[@name='field']/bool[@name='termPayloads'] = 'false'",
"/response/lst[@name='field']/bool[@name='termOffsets'] = 'false'",
"/response/lst[@name='field']/bool[@name='omitNorms'] = 'false'",
"/response/lst[@name='field']/bool[@name='omitTermFreqAndPositions'] = 'false'",
@@ -61,6 +62,7 @@ public class TestFieldResource extends S
"/field/termVectors==true",
"/field/termPositions==true",
"/field/termOffsets==false",
+ "/field/termPayloads==false",
"/field/omitNorms==false",
"/field/omitTermFreqAndPositions==false",
"/field/omitPositions==false",
Modified: lucene/dev/branches/branch_5x/solr/server/solr/configsets/sample_techproducts_configs/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/server/solr/configsets/sample_techproducts_configs/conf/schema.xml?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/server/solr/configsets/sample_techproducts_configs/conf/schema.xml (original)
+++ lucene/dev/branches/branch_5x/solr/server/solr/configsets/sample_techproducts_configs/conf/schema.xml Fri Mar 27 02:10:00 2015
@@ -93,6 +93,8 @@
This will increase storage costs.
termOffsets: Store offset information with the term vector. This
will increase storage costs.
+ termPayloads: Store payload information with the term vector. This
+ will increase storage costs.
required: The field is required. It will throw an error if the
value does not exist
default: a value that should be used if no value is specified
Modified: lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/luke/FieldFlag.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/luke/FieldFlag.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/luke/FieldFlag.java (original)
+++ lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/luke/FieldFlag.java Fri Mar 27 02:10:00 2015
@@ -30,6 +30,7 @@ public enum FieldFlag {
TERM_VECTOR_STORED('V', "TermVector Stored"),
TERM_VECTOR_OFFSET('o', "Store Offset With TermVector"),
TERM_VECTOR_POSITION('p', "Store Position With TermVector"),
+ TERM_VECTOR_PAYLOADS('y', "Store Payloads With TermVector"),
OMIT_NORMS('O', "Omit Norms"),
OMIT_TF('F', "Omit Term Frequencies & Positions"),
OMIT_POSITIONS('P', "Omit Positions"),
Modified: lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/TermVectorParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/TermVectorParams.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/TermVectorParams.java (original)
+++ lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/TermVectorParams.java Fri Mar 27 02:10:00 2015
@@ -35,6 +35,11 @@ public interface TermVectorParams {
* */
public static final String POSITIONS = TV_PREFIX + "positions";
/**
+ * Return Term Vector payloads information
+ *
+ * */
+ public static final String PAYLOADS = TV_PREFIX + "payloads";
+ /**
* Return offset information, if available
* */
public static final String OFFSETS = TV_PREFIX + "offsets";