You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ds...@apache.org on 2015/03/27 03:10:01 UTC

svn commit: r1669494 - in /lucene/dev/branches/branch_5x: ./ solr/ solr/core/ solr/core/src/java/org/apache/solr/handler/admin/ solr/core/src/java/org/apache/solr/handler/component/ solr/core/src/java/org/apache/solr/schema/ solr/core/src/test-files/so...

Author: dsmiley
Date: Fri Mar 27 02:10:00 2015
New Revision: 1669494

URL: http://svn.apache.org/r1669494
Log:
SOLR-5911: term vector payload support
in schema & TermVectorComponent & LukeRequestHandler

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/EnumField.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldType.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaField.java
    lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema.xml
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java
    lucene/dev/branches/branch_5x/solr/server/   (props changed)
    lucene/dev/branches/branch_5x/solr/server/solr/configsets/sample_techproducts_configs/conf/schema.xml
    lucene/dev/branches/branch_5x/solr/solrj/   (props changed)
    lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/luke/FieldFlag.java
    lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/TermVectorParams.java

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Fri Mar 27 02:10:00 2015
@@ -178,6 +178,10 @@ New Features
 
 * SOLR-7240: '/' redirects to '/solr/' for convinience (Martijn Koster, hossman)
 
+* SOLR-5911: Added payload support for term vectors. New "termPayloads" option for fields
+  / types in the schema, and "tv.payloads" param for the term vector component.
+  (Mike McCandless, David Smiley)
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java Fri Mar 27 02:10:00 2015
@@ -20,7 +20,16 @@ package org.apache.solr.handler.admin;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.util.CharFilterFactory;
@@ -28,9 +37,20 @@ import org.apache.lucene.analysis.util.T
 import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SegmentReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
@@ -49,18 +69,18 @@ import org.apache.solr.common.util.Simpl
 import org.apache.solr.handler.RequestHandlerBase;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.CopyField;
 import org.apache.solr.schema.FieldType;
-import org.apache.solr.update.SolrIndexWriter;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
-import org.apache.solr.schema.CopyField;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.update.SolrIndexWriter;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
-import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS;
 import static org.apache.lucene.index.IndexOptions.DOCS;
+import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS;
+import static org.apache.lucene.index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
 
 /**
  * This handler exposes the internal lucene index.  It is inspired by and 
@@ -186,6 +206,7 @@ public class LukeRequestHandler extends
     flags.append( (f != null && f.fieldType().storeTermVectors())            ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
     flags.append( (f != null && f.fieldType().storeTermVectorOffsets())   ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
     flags.append( (f != null && f.fieldType().storeTermVectorPositions()) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
+    flags.append( (f != null && f.fieldType().storeTermVectorPayloads())   ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-' );
     flags.append( (f != null && f.fieldType().omitNorms())                  ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
 
     flags.append( (f != null && DOCS == opts ) ?
@@ -224,6 +245,7 @@ public class LukeRequestHandler extends
     flags.append( (f != null && f.storeTermVector() )    ? FieldFlag.TERM_VECTOR_STORED.getAbbreviation() : '-' );
     flags.append( (f != null && f.storeTermOffsets() )   ? FieldFlag.TERM_VECTOR_OFFSET.getAbbreviation() : '-' );
     flags.append( (f != null && f.storeTermPositions() ) ? FieldFlag.TERM_VECTOR_POSITION.getAbbreviation() : '-' );
+    flags.append( (f != null && f.storeTermPayloads() )  ? FieldFlag.TERM_VECTOR_PAYLOADS.getAbbreviation() : '-' );
     flags.append( (f != null && f.omitNorms())           ? FieldFlag.OMIT_NORMS.getAbbreviation() : '-' );
     flags.append( (f != null &&
         f.omitTermFreqAndPositions() )        ? FieldFlag.OMIT_TF.getAbbreviation() : '-' );

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java Fri Mar 27 02:10:00 2015
@@ -11,10 +11,10 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.StoredFieldVisitor;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
@@ -24,6 +24,7 @@ import org.apache.solr.common.SolrExcept
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.TermVectorParams;
+import org.apache.solr.common.util.Base64;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.IndexSchema;
@@ -58,7 +59,7 @@ import org.apache.solr.util.plugin.SolrC
  * Return term vectors for the documents in a query result set.
  * <p>
  * Info available:
- * term, frequency, position, offset, IDF.
+ * term, frequency, position, offset, payloads, IDF.
  * <p>
  * <b>Note</b> Returning IDF can be expensive.
  * 
@@ -153,6 +154,7 @@ public class TermVectorComponent extends
     allFields.termFreq = params.getBool(TermVectorParams.TF, false);
     allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
     allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
+    allFields.payloads = params.getBool(TermVectorParams.PAYLOADS, false);
     allFields.docFreq = params.getBool(TermVectorParams.DF, false);
     allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
     //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false);
@@ -161,6 +163,7 @@ public class TermVectorComponent extends
       allFields.termFreq = true;
       allFields.positions = true;
       allFields.offsets = true;
+      allFields.payloads = true;
       allFields.docFreq = true;
       allFields.tfIdf = true;
     }
@@ -171,6 +174,7 @@ public class TermVectorComponent extends
     List<String>  noTV = new ArrayList<>();
     List<String>  noPos = new ArrayList<>();
     List<String>  noOff = new ArrayList<>();
+    List<String>  noPay = new ArrayList<>();
 
     Set<String> fields = getFields(rb);
     if ( null != fields ) {
@@ -207,6 +211,10 @@ public class TermVectorComponent extends
             if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey){
               noOff.add(field);
             }
+            option.payloads = params.getFieldBool(field, TermVectorParams.PAYLOADS, allFields.payloads);
+            if (option.payloads && !sf.storeTermPayloads() && !fieldIsUniqueKey){
+              noPay.add(field);
+            }
           } else {//field doesn't have term vectors
             if (!fieldIsUniqueKey) noTV.add(field);
           }
@@ -234,6 +242,10 @@ public class TermVectorComponent extends
       warnings.add("noOffsets", noOff);
       hasWarnings = true;
     }
+    if (!noPay.isEmpty()) {
+      warnings.add("noPayloads", noPay);
+      hasWarnings = true;
+    }
     if (hasWarnings) {
       termVectors.add("warnings", warnings);
     }
@@ -341,22 +353,27 @@ public class TermVectorComponent extends
         termInfo.add("tf", freq);
       }
 
-      dpEnum = termsEnum.postings(null, dpEnum, PostingsEnum.ALL);
-      boolean useOffsets = false;
-      boolean usePositions = false;
+      int dpEnumFlags = 0;
+      dpEnumFlags |= fieldOptions.positions ? PostingsEnum.POSITIONS : 0;
+      //payloads require offsets
+      dpEnumFlags |= (fieldOptions.offsets || fieldOptions.payloads) ? PostingsEnum.OFFSETS : 0;
+      dpEnumFlags |= fieldOptions.payloads ? PostingsEnum.PAYLOADS : 0;
+      dpEnum = termsEnum.postings(null, dpEnum, dpEnumFlags);
+
+      boolean atNextDoc = false;
       if (dpEnum != null) {
         dpEnum.nextDoc();
-        usePositions = fieldOptions.positions;
-        useOffsets = fieldOptions.offsets;
+        atNextDoc = true;
       }
 
-      NamedList<Integer> positionsNL = null;
-      NamedList<Number> theOffsets = null;
+      if (atNextDoc && dpEnumFlags != 0) {
+        NamedList<Integer> positionsNL = null;
+        NamedList<Number> theOffsets = null;
+        NamedList<String> thePayloads = null;
 
-      if (usePositions || useOffsets) {
         for (int i = 0; i < freq; i++) {
           final int pos = dpEnum.nextPosition();
-          if (usePositions && pos >= 0) {
+          if (fieldOptions.positions && pos >= 0) {
             if (positionsNL == null) {
               positionsNL = new NamedList<>();
               termInfo.add("positions", positionsNL);
@@ -364,19 +381,24 @@ public class TermVectorComponent extends
             positionsNL.add("position", pos);
           }
 
-          if (useOffsets && theOffsets == null) {
-            if (dpEnum.startOffset() == -1) {
-              useOffsets = false;
-            } else {
+          int startOffset = fieldOptions.offsets ? dpEnum.startOffset() : -1;
+          if (startOffset >= 0) {
+            if (theOffsets == null) {
               theOffsets = new NamedList<>();
               termInfo.add("offsets", theOffsets);
             }
-          }
-
-          if (theOffsets != null) {
             theOffsets.add("start", dpEnum.startOffset());
             theOffsets.add("end", dpEnum.endOffset());
           }
+
+          BytesRef payload = fieldOptions.payloads ? dpEnum.getPayload() : null;
+          if (payload != null) {
+            if (thePayloads == null) {
+              thePayloads = new NamedList<>();
+              termInfo.add("payloads", thePayloads);
+            }
+            thePayloads.add("payload", Base64.byteArrayToBase64(payload.bytes, payload.offset, payload.length));
+          }
         }
       }
       
@@ -472,5 +494,5 @@ public class TermVectorComponent extends
 
 class FieldOptions {
   String fieldName;
-  boolean termFreq, positions, offsets, docFreq, tfIdf;
+  boolean termFreq, positions, offsets, payloads, docFreq, tfIdf;
 }

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/EnumField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/EnumField.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/EnumField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/EnumField.java Fri Mar 27 02:10:00 2015
@@ -17,6 +17,21 @@ package org.apache.solr.schema;
  * limitations under the License.
  */
 
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathExpressionException;
+import javax.xml.xpath.XPathFactory;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
@@ -24,7 +39,11 @@ import org.apache.lucene.index.IndexOpti
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.queries.function.valuesource.EnumFieldSource;
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.DocValuesRangeQuery;
+import org.apache.lucene.search.NumericRangeQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SortField;
 import org.apache.lucene.uninverting.UninvertingReader.Type;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
@@ -42,17 +61,6 @@ import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;
 
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.xpath.XPath;
-import javax.xml.xpath.XPathConstants;
-import javax.xml.xpath.XPathExpressionException;
-import javax.xml.xpath.XPathFactory;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.*;
-
 /***
  * Field type for support of string values with custom sort order.
  */
@@ -388,6 +396,7 @@ public class EnumField extends Primitive
     newType.setStoreTermVectors(field.storeTermVector());
     newType.setStoreTermVectorOffsets(field.storeTermOffsets());
     newType.setStoreTermVectorPositions(field.storeTermPositions());
+    newType.setStoreTermVectorPayloads(field.storeTermPayloads());
     newType.setNumericType(FieldType.NumericType.INT);
     newType.setNumericPrecisionStep(DEFAULT_PRECISION_STEP);
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldProperties.java Fri Mar 27 02:10:00 2015
@@ -17,8 +17,8 @@
 
 package org.apache.solr.schema;
 
-import java.util.Map;
 import java.util.HashMap;
+import java.util.Map;
 
 /**
  *
@@ -52,13 +52,15 @@ public abstract class FieldProperties {
   protected final static int STORE_OFFSETS       = 0x00004000;
   protected final static int DOC_VALUES          = 0x00008000;
 
+  protected final static int STORE_TERMPAYLOADS  = 0x00010000;
+
   static final String[] propertyNames = {
           "indexed", "tokenized", "stored",
           "binary", "omitNorms", "omitTermFreqAndPositions",
           "termVectors", "termPositions", "termOffsets",
           "multiValued",
           "sortMissingFirst","sortMissingLast","required", "omitPositions",
-          "storeOffsetsWithPositions", "docValues"
+          "storeOffsetsWithPositions", "docValues", "termPayloads"
   };
 
   static final Map<String,Integer> propertyMap = new HashMap<>();

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldType.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldType.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/FieldType.java Fri Mar 27 02:10:00 2015
@@ -17,8 +17,6 @@
 
 package org.apache.solr.schema;
 
-import static org.apache.lucene.analysis.util.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -67,6 +65,8 @@ import org.apache.solr.search.Sorting;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static org.apache.lucene.analysis.util.AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM;
+
 /**
  * Base class for all field types used by an index schema.
  *
@@ -268,6 +268,7 @@ public abstract class FieldType extends
     newType.setStoreTermVectors(field.storeTermVector());
     newType.setStoreTermVectorOffsets(field.storeTermOffsets());
     newType.setStoreTermVectorPositions(field.storeTermPositions());
+    newType.setStoreTermVectorPayloads(field.storeTermPayloads());
 
     return createField(field.getName(), val, newType, boost);
   }

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java Fri Mar 27 02:10:00 2015
@@ -33,7 +33,6 @@ import org.apache.lucene.index.Indexable
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
 import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.SortedSetSortField;
 import org.apache.lucene.uninverting.UninvertingReader.Type;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.AttributeSource;
@@ -178,6 +177,7 @@ public class PreAnalyzedField extends Fi
     newType.setStoreTermVectors(field.storeTermVector());
     newType.setStoreTermVectorOffsets(field.storeTermOffsets());
     newType.setStoreTermVectorPositions(field.storeTermPositions());
+    newType.setStoreTermVectorPayloads(field.storeTermPayloads());
     return newType;
   }
   

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaField.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaField.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/schema/SchemaField.java Fri Mar 27 02:10:00 2015
@@ -17,19 +17,18 @@
 
 package org.apache.solr.schema;
 
-import org.apache.solr.common.SolrException;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.search.SortField;
-import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.search.QParser;
-
-import org.apache.solr.response.TextResponseWriter;
-
+import java.io.IOException;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.io.IOException;
+
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.search.SortField;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.response.TextResponseWriter;
+import org.apache.solr.search.QParser;
 
 /**
  * Encapsulates all information about a Field in a Solr Schema
@@ -94,6 +93,7 @@ public final class SchemaField extends F
   public boolean storeTermVector() { return (properties & STORE_TERMVECTORS)!=0; }
   public boolean storeTermPositions() { return (properties & STORE_TERMPOSITIONS)!=0; }
   public boolean storeTermOffsets() { return (properties & STORE_TERMOFFSETS)!=0; }
+  public boolean storeTermPayloads() { return (properties & STORE_TERMPAYLOADS)!=0; }
   public boolean omitNorms() { return (properties & OMIT_NORMS)!=0; }
 
   public boolean omitTermFreqAndPositions() { return (properties & OMIT_TF_POSITIONS)!=0; }
@@ -236,7 +236,7 @@ public final class SchemaField extends F
 
     if (on(falseProps,INDEXED)) {
       int pp = (INDEXED 
-              | STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
+              | STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS | STORE_TERMPAYLOADS);
       if (on(pp,trueProps)) {
         throw new RuntimeException("SchemaField: " + name + " conflicting 'true' field options for non-indexed field:" + props);
       }
@@ -269,7 +269,7 @@ public final class SchemaField extends F
     }
 
     if (on(falseProps,STORE_TERMVECTORS)) {
-      int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
+      int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS | STORE_TERMPAYLOADS);
       if (on(pp,trueProps)) {
         throw new RuntimeException("SchemaField: " + name + " conflicting termvector field options:" + props);
       }
@@ -323,6 +323,7 @@ public final class SchemaField extends F
       properties.add(getPropertyName(STORE_TERMVECTORS), storeTermVector());
       properties.add(getPropertyName(STORE_TERMPOSITIONS), storeTermPositions());
       properties.add(getPropertyName(STORE_TERMOFFSETS), storeTermOffsets());
+      properties.add(getPropertyName(STORE_TERMPAYLOADS), storeTermPayloads());
       properties.add(getPropertyName(OMIT_NORMS), omitNorms());
       properties.add(getPropertyName(OMIT_TF_POSITIONS), omitTermFreqAndPositions());
       properties.add(getPropertyName(OMIT_POSITIONS), omitPositions());

Modified: lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema.xml?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test-files/solr/collection1/conf/schema.xml Fri Mar 27 02:10:00 2015
@@ -103,6 +103,16 @@
       </analyzer>
     </fieldType>
 
+    <fieldtype name="text_payload_tv" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StandardFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory"/>
+        <filter class="solr.PorterStemFilterFactory"/>
+        <filter class="org.apache.lucene.analysis.payloads.TokenOffsetPayloadTokenFilterFactory"/>
+      </analyzer>
+    </fieldtype>
 
     <fieldType name="nametext" class="solr.TextField">
       <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
@@ -478,6 +488,8 @@
    <field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
    <field name="test_posofftv" type="text" termVectors="true"
      termPositions="true" termOffsets="true"/>
+   <field name="test_posoffpaytv" type="text_payload_tv" termVectors="true" 
+     termPositions="true" termOffsets="true" termPayloads="true"/>
 
    <!-- test highlit field settings -->
    <field name="test_hlt" type="highlittext" indexed="true"/>

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java Fri Mar 27 02:10:00 2015
@@ -17,6 +17,8 @@
 
 package org.apache.solr;
 
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.StringWriter;
@@ -26,18 +28,10 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.LazyDocument;
-import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext.Context;
-import org.apache.lucene.store.MockDirectoryWrapper;
-import org.apache.lucene.util.English;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.MapSolrParams;
@@ -56,10 +50,7 @@ import org.apache.solr.schema.IndexSchem
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocList;
-import org.apache.solr.update.DirectUpdateHandler2;
-import org.apache.solr.util.RefCounted;
 import org.junit.BeforeClass;
-import org.junit.Ignore;
 import org.junit.Test;
 
 /**
@@ -540,6 +531,11 @@ public class BasicFunctionalityTest exte
     assertTrue(f.storeTermVector() && f.storeTermPositions() && f.storeTermOffsets());
     assertTrue(luf.fieldType().storeTermVectorOffsets() && luf.fieldType().storeTermVectorPositions());
 
+    f = ischema.getField("test_posoffpaytv");
+    luf = f.createField("test", 0f);
+    assertTrue(f.storeTermVector() && f.storeTermPositions() && f.storeTermOffsets() && f.storeTermPayloads());
+    assertTrue(luf.fieldType().storeTermVectorOffsets() && luf.fieldType().storeTermVectorPositions() && luf.fieldType().storeTermVectorPayloads());
+
   }
 
   @Test

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/handler/component/TermVectorComponentTest.java Fri Mar 27 02:10:00 2015
@@ -1,19 +1,13 @@
 package org.apache.solr.handler.component;
 
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
 import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.TermVectorParams;
-import org.apache.solr.request.LocalSolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
 import org.junit.BeforeClass;
 import org.junit.Test;
-
-import java.util.HashMap;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -42,6 +36,7 @@ public class TermVectorComponentTest ext
     initCore("solrconfig.xml","schema.xml");
 
     assertU(adoc("id", "0",
+            "test_posoffpaytv", "This is a title and another title",
             "test_posofftv", "This is a title and another title",
             "test_basictv", "This is a title and another title",
             "test_notv", "This is a title and another title",
@@ -49,6 +44,7 @@ public class TermVectorComponentTest ext
             "test_offtv", "This is a title and another title"
     ));
     assertU(adoc("id", "1",
+            "test_posoffpaytv", "The quick reb fox jumped over the lazy brown dogs.",
             "test_posofftv", "The quick reb fox jumped over the lazy brown dogs.",
             "test_basictv", "The quick reb fox jumped over the lazy brown dogs.",
             "test_notv", "The quick reb fox jumped over the lazy brown dogs.",
@@ -56,6 +52,7 @@ public class TermVectorComponentTest ext
             "test_offtv", "The quick reb fox jumped over the lazy brown dogs."
     ));
     assertU(adoc("id", "2",
+            "test_posoffpaytv", "This is a document",
             "test_posofftv", "This is a document",
             "test_basictv", "This is a document",
             "test_notv", "This is a document",
@@ -63,6 +60,7 @@ public class TermVectorComponentTest ext
             "test_offtv", "This is a document"
     ));
     assertU(adoc("id", "3",
+            "test_posoffpaytv", "another document",
             "test_posofftv", "another document",
             "test_basictv", "another document",
             "test_notv", "another document",
@@ -71,6 +69,7 @@ public class TermVectorComponentTest ext
     ));
     //bunch of docs that are variants on blue
     assertU(adoc("id", "4",
+            "test_posoffpaytv", "blue",
             "test_posofftv", "blue",
             "test_basictv", "blue",
             "test_notv", "blue",
@@ -78,6 +77,7 @@ public class TermVectorComponentTest ext
             "test_offtv", "blue"
     ));
     assertU(adoc("id", "5",
+            "test_posoffpaytv", "blud",
             "test_posofftv", "blud",
             "test_basictv", "blud",
             "test_notv", "blud",
@@ -85,6 +85,7 @@ public class TermVectorComponentTest ext
             "test_offtv", "blud"
     ));
     assertU(adoc("id", "6",
+            "test_posoffpaytv", "boue",
             "test_posofftv", "boue",
             "test_basictv", "boue",
             "test_notv", "boue",
@@ -92,6 +93,7 @@ public class TermVectorComponentTest ext
             "test_offtv", "boue"
     ));
     assertU(adoc("id", "7",
+            "test_posoffpaytv", "glue",
             "test_posofftv", "glue",
             "test_basictv", "glue",
             "test_notv", "glue",
@@ -99,6 +101,7 @@ public class TermVectorComponentTest ext
             "test_offtv", "glue"
     ));
     assertU(adoc("id", "8",
+            "test_posoffpaytv", "blee",
             "test_posofftv", "blee",
             "test_basictv", "blee",
             "test_notv", "blee",
@@ -106,6 +109,7 @@ public class TermVectorComponentTest ext
             "test_offtv", "blee"
     ));
     assertU(adoc("id", "9",
+            "test_posoffpaytv", "blah",
             "test_posofftv", "blah",
             "test_basictv", "blah",
             "test_notv", "blah",
@@ -125,6 +129,7 @@ public class TermVectorComponentTest ext
             " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
             " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
             " 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
+            " 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
             " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
             " 'uniqueKeyFieldName':'id'}"
     );
@@ -166,6 +171,7 @@ public class TermVectorComponentTest ext
             " 'test_basictv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
             " 'test_offtv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
             " 'test_posofftv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
+            " 'test_posoffpaytv':{'anoth':{'tf':1},'titl':{'tf':2}}," +
             " 'test_postv':{'anoth':{'tf':1},'titl':{'tf':2}}}," +
             " 'uniqueKeyFieldName':'id'}"
     );
@@ -241,7 +247,8 @@ public class TermVectorComponentTest ext
   public void testPerField() throws Exception {
     assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
         ,TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true"
-        ,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv"
+        ,TermVectorParams.FIELDS, "test_basictv,test_notv,test_postv,test_offtv,test_posofftv,test_posoffpaytv"
+        ,"f.test_posoffpaytv." + TermVectorParams.PAYLOADS, "false"
         ,"f.test_posofftv." + TermVectorParams.POSITIONS, "false"
         ,"f.test_offtv." + TermVectorParams.OFFSETS, "false"
         ,"f.test_basictv." + TermVectorParams.DF, "false"
@@ -255,6 +262,17 @@ public class TermVectorComponentTest ext
     );
   }
 
+  @Test
+  public void testPayloads() throws Exception {
+    // This field uses TokenOffsetPayloadTokenFilter, which
+    // stuffs start (20) and end offset (27) into the
+    // payload:
+    assertJQ(req("json.nl","map", "qt",tv, "q", "id:0", TermVectorComponent.COMPONENT_NAME, "true"
+                 , TermVectorParams.TF, "true", TermVectorParams.DF, "true", TermVectorParams.OFFSETS, "true", TermVectorParams.POSITIONS, "true", TermVectorParams.TF_IDF, "true",
+                 TermVectorParams.PAYLOADS, "true")
+       ,"/termVectors/0/test_posoffpaytv/anoth=={'tf':1, 'offsets':{'start':20, 'end':27}, 'positions':{'position':5}, 'payloads':{'payload': 'AAAAFAAAABs='}, 'df':2, 'tf-idf':0.5}"
+    );
+  }
 }
 
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/rest/schema/TestFieldResource.java Fri Mar 27 02:10:00 2015
@@ -24,7 +24,7 @@ public class TestFieldResource extends S
   public void testGetField() throws Exception {
     assertQ("/schema/fields/test_postv?indent=on&wt=xml&showDefaults=true",
             "count(/response/lst[@name='field']) = 1",
-            "count(/response/lst[@name='field']/*) = 15",
+            "count(/response/lst[@name='field']/*) = 16",
             "/response/lst[@name='field']/str[@name='name'] = 'test_postv'",
             "/response/lst[@name='field']/str[@name='type'] = 'text'",
             "/response/lst[@name='field']/bool[@name='indexed'] = 'true'",
@@ -32,6 +32,7 @@ public class TestFieldResource extends S
             "/response/lst[@name='field']/bool[@name='docValues'] = 'false'",
             "/response/lst[@name='field']/bool[@name='termVectors'] = 'true'",
             "/response/lst[@name='field']/bool[@name='termPositions'] = 'true'",
+            "/response/lst[@name='field']/bool[@name='termPayloads'] = 'false'",
             "/response/lst[@name='field']/bool[@name='termOffsets'] = 'false'",
             "/response/lst[@name='field']/bool[@name='omitNorms'] = 'false'",
             "/response/lst[@name='field']/bool[@name='omitTermFreqAndPositions'] = 'false'",
@@ -61,6 +62,7 @@ public class TestFieldResource extends S
              "/field/termVectors==true",
              "/field/termPositions==true",
              "/field/termOffsets==false",
+             "/field/termPayloads==false",
              "/field/omitNorms==false",
              "/field/omitTermFreqAndPositions==false",
              "/field/omitPositions==false",

Modified: lucene/dev/branches/branch_5x/solr/server/solr/configsets/sample_techproducts_configs/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/server/solr/configsets/sample_techproducts_configs/conf/schema.xml?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/server/solr/configsets/sample_techproducts_configs/conf/schema.xml (original)
+++ lucene/dev/branches/branch_5x/solr/server/solr/configsets/sample_techproducts_configs/conf/schema.xml Fri Mar 27 02:10:00 2015
@@ -93,6 +93,8 @@
        This will increase storage costs.
      termOffsets: Store offset information with the term vector. This 
        will increase storage costs.
+     termPayloads: Store payload information with the term vector. This
+       will increase storage costs.
      required: The field is required.  It will throw an error if the
        value does not exist
      default: a value that should be used if no value is specified

Modified: lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/luke/FieldFlag.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/luke/FieldFlag.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/luke/FieldFlag.java (original)
+++ lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/luke/FieldFlag.java Fri Mar 27 02:10:00 2015
@@ -30,6 +30,7 @@ public enum FieldFlag {
   TERM_VECTOR_STORED('V', "TermVector Stored"), 
   TERM_VECTOR_OFFSET('o', "Store Offset With TermVector"),
   TERM_VECTOR_POSITION('p', "Store Position With TermVector"),
+  TERM_VECTOR_PAYLOADS('y', "Store Payloads With TermVector"),
   OMIT_NORMS('O', "Omit Norms"), 
   OMIT_TF('F', "Omit Term Frequencies & Positions"), 
   OMIT_POSITIONS('P', "Omit Positions"),

Modified: lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/TermVectorParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/TermVectorParams.java?rev=1669494&r1=1669493&r2=1669494&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/TermVectorParams.java (original)
+++ lucene/dev/branches/branch_5x/solr/solrj/src/java/org/apache/solr/common/params/TermVectorParams.java Fri Mar 27 02:10:00 2015
@@ -35,6 +35,11 @@ public interface TermVectorParams {
   * */
   public static final String POSITIONS = TV_PREFIX + "positions";
   /**
+  * Return Term Vector payloads information
+  *
+  * */
+  public static final String PAYLOADS = TV_PREFIX + "payloads";
+  /**
   * Return offset information, if available
   * */
   public static final String OFFSETS = TV_PREFIX + "offsets";