You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/05/20 20:11:32 UTC
svn commit: r1680645 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/suggest/ lucene/suggest/src/java/org/apache/lucene/search/suggest/ lucene/suggest/src/test/org/apache/lucene/search/suggest/

Author: mikemccand
Date: Wed May 20 18:11:31 2015
New Revision: 1680645

URL: http://svn.apache.org/r1680645
Log:
LUCENE-6486: make payloads optional in DocumentDictionary

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
    lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java
    lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
    lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1680645&r1=1680644&r2=1680645&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Wed May 20 18:11:31 2015
@@ -149,6 +149,10 @@ Bug Fixes
 * LUCENE-6483: Ensure core closed listeners are called on the same cache key as
   the reader which has been used to register the listener. (Adrien Grand)
 
+* LUCENE-6486 DocumentDictionary iterator no longer skips
+  documents with no payloads and now returns an empty BytesRef instead
+  (Marius Grama via Michael McCandless)
+
 API Changes
 
 * LUCENE-6377: SearcherFactory#newSearcher now accepts the previous reader

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java?rev=1680645&r1=1680644&r2=1680645&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java Wed May 20 18:11:31 2015
@@ -18,6 +18,7 @@ package org.apache.lucene.search.suggest
  */
 
 import java.io.IOException;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.Set;
 
@@ -41,17 +42,14 @@ import org.apache.lucene.util.BytesRef;
  * <b>NOTE:</b> 
  *  <ul>
  *    <li>
- *      The term and (optionally) payload fields have to be
- *      stored
+ *      The term field has to be stored; if it is missing, the document is skipped.
  *    </li>
  *    <li>
- *      The weight field can be stored or can be a {@link NumericDocValues}.
- *      If the weight field is not defined, the value of the weight is <code>0</code>
+ *      The payload and contexts field are optional and are not required to be stored.
  *    </li>
  *    <li>
- *      if any of the term or (optionally) payload fields supplied
- *      do not have a value for a document, then the document is 
- *      skipped by the dictionary
+ *      The weight field can be stored or can be a {@link NumericDocValues}.
+ *      If the weight field is not defined, the value of the weight is <code>0</code>
  *    </li>
  *  </ul>
  */
@@ -90,7 +88,7 @@ public class DocumentDictionary implemen
    * Creates a new dictionary with the contents of the fields named <code>field</code>
    * for the terms, <code>weightField</code> for the weights that will be used for the 
    * the corresponding terms, <code>payloadField</code> for the corresponding payloads
-   * for the entry and <code>contextsFeild</code> for associated contexts.
+   * for the entry and <code>contextsField</code> for associated contexts.
    */
   public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField, String contextsField) {
     this.reader = reader;
@@ -167,25 +165,26 @@ public class DocumentDictionary implemen
 
         Document doc = reader.document(currentDocId, relevantFields);
 
-        Set<BytesRef> tempContexts = new HashSet<>();
-
-        BytesRef tempPayload;
+        BytesRef tempPayload = null;
         if (hasPayloads) {
           IndexableField payload = doc.getField(payloadField);
-          if (payload == null) {
-            continue;
-          } else if (payload.binaryValue() != null) {
-            tempPayload =  payload.binaryValue();
-          } else if (payload.stringValue() != null) {
-            tempPayload = new BytesRef(payload.stringValue());
-          } else {
-            continue;
+          if (payload != null) {
+            if (payload.binaryValue() != null) {
+              tempPayload =  payload.binaryValue();
+            } else if (payload.stringValue() != null) {
+              tempPayload = new BytesRef(payload.stringValue());
+            }
+          }
+          // in case that the iterator has payloads configured, use empty values
+          // instead of null for payload
+          if (tempPayload == null) {
+            tempPayload = new BytesRef();
           }
-        } else {
-          tempPayload = null;
         }
 
+        Set<BytesRef> tempContexts;
         if (hasContexts) {
+          tempContexts = new HashSet<>();
           final IndexableField[] contextFields = doc.getFields(contextsField);
           for (IndexableField contextField : contextFields) {
             if (contextField.binaryValue() != null) {
@@ -196,6 +195,8 @@ public class DocumentDictionary implemen
               continue;
             }
           }
+        } else {
+          tempContexts = Collections.emptySet();
         }
 
         currentDocFields = doc.getFields(field);

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java?rev=1680645&r1=1680644&r2=1680645&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java Wed May 20 18:11:31 2015
@@ -31,7 +31,8 @@ import org.apache.lucene.queries.functio
 
 /**
  * <p>
- * Dictionary with terms and optionally payload information 
+ * Dictionary with terms and optionally payload and
+ * optionally contexts information
  * taken from stored fields in a Lucene index. Similar to 
  * {@link DocumentDictionary}, except it obtains the weight
  * of the terms in a document based on a {@link ValueSource}.
@@ -39,13 +40,10 @@ import org.apache.lucene.queries.functio
  * <b>NOTE:</b> 
  *  <ul>
  *    <li>
- *      The term and (optionally) payload fields have to be
- *      stored
+ *      The term field has to be stored; if it is missing, the document is skipped.
  *    </li>
  *    <li>
- *      if the term or (optionally) payload fields supplied
- *      do not have a value for a document, then the document is 
- *      rejected by the dictionary
+ *      The payload and contexts field are optional and are not required to be stored.
  *    </li>
  *  </ul>
  *  <p>

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java?rev=1680645&r1=1680644&r2=1680645&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java Wed May 20 18:11:31 2015
@@ -55,63 +55,6 @@ public class DocumentDictionaryTest exte
   static final String PAYLOAD_FIELD_NAME = "p1";
   static final String CONTEXT_FIELD_NAME = "c1";
   
-  /** Returns Pair(list of invalid document terms, Map of document term -&gt; document) */
-  private Map.Entry<List<String>, Map<String, Document>> generateIndexDocuments(int ndocs, boolean requiresPayload, boolean requiresContexts) {
-    Map<String, Document> docs = new HashMap<>();
-    List<String> invalidDocTerms = new ArrayList<>();
-    for(int i = 0; i < ndocs ; i++) {
-      Document doc = new Document();
-      boolean invalidDoc = false;
-      Field field = null;
-      // usually have valid term field in document
-      if (usually()) {
-        field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
-        doc.add(field);
-      } else {
-        invalidDoc = true;
-      }
-      
-      // even if payload is not required usually have it
-      if (requiresPayload || usually()) {
-        // usually have valid payload field in document
-        if (usually()) {
-          Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
-          doc.add(payload);
-        } else if (requiresPayload) {
-          invalidDoc = true;
-        }
-      }
-      
-      if (requiresContexts || usually()) {
-        if (usually()) {
-          for (int j = 0; j < atLeast(2); j++) {
-            doc.add(new StoredField(CONTEXT_FIELD_NAME, new BytesRef("context_" + i + "_"+ j)));
-          }
-        }
-        // we should allow entries without context
-      }
-      
-      // usually have valid weight field in document
-      if (usually()) {
-        Field weight = (rarely()) ? 
-            new StoredField(WEIGHT_FIELD_NAME, 100d + i) : 
-            new NumericDocValuesField(WEIGHT_FIELD_NAME, 100 + i);
-        doc.add(weight);
-      }
-      
-      String term = null;
-      if (invalidDoc) {
-        term = (field!=null) ? field.stringValue() : "invalid_" + i;
-        invalidDocTerms.add(term);
-      } else {
-        term = field.stringValue();
-      }
-      
-      docs.put(term, doc);
-    }
-    return new SimpleEntry<>(invalidDocTerms, docs);
-  }
-  
   @Test
   public void testEmptyReader() throws IOException {
     Directory dir = newDirectory();
@@ -140,7 +83,7 @@ public class DocumentDictionaryTest exte
     IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true, false);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
     Map<String, Document> docs = res.getValue();
     List<String> invalidDocTerms = res.getKey();
     for(Document doc: docs.values()) {
@@ -157,7 +100,9 @@ public class DocumentDictionaryTest exte
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
       IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
       assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
     }
     
     for (String invalidTerm : invalidDocTerms) {
@@ -167,6 +112,41 @@ public class DocumentDictionaryTest exte
     
     IOUtils.close(ir, analyzer, dir);
   }
+
+  @Test
+  public void testWithOptionalPayload() throws IOException {
+    Directory dir = newDirectory();
+    Analyzer analyzer = new MockAnalyzer(random());
+    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
+
+    // Create a document that is missing the payload field
+    Document doc = new Document();
+    Field field = new TextField(FIELD_NAME, "some field", Field.Store.YES);
+    doc.add(field);
+    // do not store the payload or the contexts
+    Field weight = new NumericDocValuesField(WEIGHT_FIELD_NAME, 100);
+    doc.add(weight);
+    writer.addDocument(doc);
+    writer.commit();
+    writer.close();
+    IndexReader ir = DirectoryReader.open(dir);
+
+    // Even though the payload field is missing, the dictionary iterator should not skip the document
+    // because the payload field is optional.
+    Dictionary dictionaryOptionalPayload =
+        new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME);
+    InputIterator inputIterator = dictionaryOptionalPayload.getEntryIterator();
+    BytesRef f = inputIterator.next();
+    assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
+    IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
+    assertEquals(inputIterator.weight(), weightField.numericValue().longValue());
+    IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+    assertNull(payloadField);
+    assertTrue(inputIterator.payload().length == 0);
+    IOUtils.close(ir, analyzer, dir);
+  }
  
   @Test
   public void testWithoutPayload() throws IOException {
@@ -175,7 +155,7 @@ public class DocumentDictionaryTest exte
     IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false, false);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
     Map<String, Document> docs = res.getValue();
     List<String> invalidDocTerms = res.getKey();
     for(Document doc: docs.values()) {
@@ -192,7 +172,7 @@ public class DocumentDictionaryTest exte
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
       IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
       assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
-      assertEquals(inputIterator.payload(), null);
+      assertNull(inputIterator.payload());
     }
     
     for (String invalidTerm : invalidDocTerms) {
@@ -211,7 +191,7 @@ public class DocumentDictionaryTest exte
     IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true, true);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true);
     Map<String, Document> docs = res.getValue();
     List<String> invalidDocTerms = res.getKey();
     for(Document doc: docs.values()) {
@@ -228,7 +208,9 @@ public class DocumentDictionaryTest exte
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
       IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
       assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
       Set<BytesRef> oriCtxs = new HashSet<>();
       Set<BytesRef> contextSet = inputIterator.contexts();
       for (IndexableField ctxf : doc.getFields(CONTEXT_FIELD_NAME)) {
@@ -252,7 +234,7 @@ public class DocumentDictionaryTest exte
     IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false, false);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
     Map<String, Document> docs = res.getValue();
     List<String> invalidDocTerms = res.getKey();
     Random rand = random();
@@ -291,7 +273,7 @@ public class DocumentDictionaryTest exte
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
       IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
       assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
-      assertEquals(inputIterator.payload(), null);
+      assertNull(inputIterator.payload());
     }
     
     for (String invalidTerm : invalidDocTerms) {
@@ -324,13 +306,65 @@ public class DocumentDictionaryTest exte
       assertTrue(f.equals(nextSuggestion.term));
       long weight = nextSuggestion.weight;
       assertEquals(inputIterator.weight(), (weight != -1) ? weight : 0);
-      assertTrue(inputIterator.payload().equals(nextSuggestion.payload));
+      assertEquals(inputIterator.payload(), nextSuggestion.payload);
       assertTrue(inputIterator.contexts().equals(nextSuggestion.contexts));
     }
     assertFalse(suggestionsIter.hasNext());
     IOUtils.close(ir, analyzer, dir);
   }
 
+  /** Returns Pair(list of invalid document terms, Map of document term -&gt; document) */
+  private Map.Entry<List<String>, Map<String, Document>> generateIndexDocuments(int ndocs, boolean requiresContexts) {
+    Map<String, Document> docs = new HashMap<>();
+    List<String> invalidDocTerms = new ArrayList<>();
+    for(int i = 0; i < ndocs ; i++) {
+      Document doc = new Document();
+      boolean invalidDoc = false;
+      Field field = null;
+      // usually have valid term field in document
+      if (usually()) {
+        field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
+        doc.add(field);
+      } else {
+        invalidDoc = true;
+      }
+
+      // even if payload is not required usually have it
+      if (usually()) {
+        Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
+        doc.add(payload);
+      }
+
+      if (requiresContexts || usually()) {
+        if (usually()) {
+          for (int j = 0; j < atLeast(2); j++) {
+            doc.add(new StoredField(CONTEXT_FIELD_NAME, new BytesRef("context_" + i + "_"+ j)));
+          }
+        }
+        // we should allow entries without context
+      }
+
+      // usually have valid weight field in document
+      if (usually()) {
+        Field weight = (rarely()) ?
+                new StoredField(WEIGHT_FIELD_NAME, 100d + i) :
+                new NumericDocValuesField(WEIGHT_FIELD_NAME, 100 + i);
+        doc.add(weight);
+      }
+
+      String term = null;
+      if (invalidDoc) {
+        term = (field!=null) ? field.stringValue() : "invalid_" + i;
+        invalidDocTerms.add(term);
+      } else {
+        term = field.stringValue();
+      }
+
+      docs.put(term, doc);
+    }
+    return new SimpleEntry<>(invalidDocTerms, docs);
+  }
+
   private List<Suggestion> indexMultiValuedDocuments(int numDocs, RandomIndexWriter writer) throws IOException {
     List<Suggestion> suggestionList = new ArrayList<>(numDocs);
 

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java?rev=1680645&r1=1680644&r2=1680645&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java Wed May 20 18:11:31 2015
@@ -58,31 +58,6 @@ public class DocumentValueSourceDictiona
   static final String WEIGHT_FIELD_NAME_3 = "w3";
   static final String PAYLOAD_FIELD_NAME = "p1";
   static final String CONTEXTS_FIELD_NAME = "c1";
-
-  private Map<String, Document> generateIndexDocuments(int ndocs) {
-    Map<String, Document> docs = new HashMap<>();
-    for(int i = 0; i < ndocs ; i++) {
-      Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
-      Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
-      Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i);
-      Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i);
-      Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i);
-      Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_"  + i + "_0"));
-      Document doc = new Document();
-      doc.add(field);
-      doc.add(payload);
-      doc.add(weight1);
-      doc.add(weight2);
-      doc.add(weight3);
-      doc.add(contexts);
-      for(int j = 1; j < atLeast(3); j++) {
-        contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j));
-        doc.add(contexts);
-      }
-      docs.put(field.stringValue(), doc);
-    }
-    return docs;
-  }
   
   @Test
   public void testEmptyReader() throws IOException {
@@ -131,7 +106,9 @@ public class DocumentValueSourceDictiona
       long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
       assertEquals(inputIterator.weight(), (w1 + w2 + w3));
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
     }
     assertTrue(docs.isEmpty());
     IOUtils.close(ir, analyzer, dir);
@@ -163,7 +140,9 @@ public class DocumentValueSourceDictiona
       long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
       assertEquals(inputIterator.weight(), (w1 + w2 + w3));
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
       Set<BytesRef> originalCtxs = new HashSet<>();
       for (IndexableField ctxf: doc.getFields(CONTEXTS_FIELD_NAME)) {
         originalCtxs.add(ctxf.binaryValue());
@@ -200,7 +179,7 @@ public class DocumentValueSourceDictiona
       long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
       assertEquals(inputIterator.weight(), (w1 + w2 + w3));
-      assertEquals(inputIterator.payload(), null);
+      assertNull(inputIterator.payload());
     }
     assertTrue(docs.isEmpty());
     IOUtils.close(ir, analyzer, dir);
@@ -253,7 +232,9 @@ public class DocumentValueSourceDictiona
       long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
       assertEquals(inputIterator.weight(), w2+w1);
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
     }
     assertTrue(docs.isEmpty());
     IOUtils.close(ir, analyzer, dir);
@@ -281,10 +262,39 @@ public class DocumentValueSourceDictiona
       Document doc = docs.remove(f.utf8ToString());
       assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
       assertEquals(inputIterator.weight(), 10);
-      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      IndexableField payloadField = doc.getField(PAYLOAD_FIELD_NAME);
+      if (payloadField == null) assertTrue(inputIterator.payload().length == 0);
+      else assertEquals(inputIterator.payload(), payloadField.binaryValue());
     }
     assertTrue(docs.isEmpty());
     IOUtils.close(ir, analyzer, dir);
   }
-  
+
+  private Map<String, Document> generateIndexDocuments(int ndocs) {
+    Map<String, Document> docs = new HashMap<>();
+    for(int i = 0; i < ndocs ; i++) {
+      Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
+      Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i);
+      Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i);
+      Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i);
+      Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_"  + i + "_0"));
+      Document doc = new Document();
+      doc.add(field);
+      // even if payload is not required usually have it
+      if (usually()) {
+        Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
+        doc.add(payload);
+      }
+      doc.add(weight1);
+      doc.add(weight2);
+      doc.add(weight3);
+      doc.add(contexts);
+      for(int j = 1; j < atLeast(3); j++) {
+        contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j));
+        doc.add(contexts);
+      }
+      docs.put(field.stringValue(), doc);
+    }
+    return docs;
+  }
 }