You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/03/23 16:43:11 UTC
svn commit: r1580517 [1/2] - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/document/ lucene/suggest/ lucene/suggest/src/java/org/apache/lucene/search/spell/ lucene/suggest/src/java/org/apache/lucene/s...

Author: mikemccand
Date: Sun Mar 23 15:43:10 2014
New Revision: 1580517

URL: http://svn.apache.org/r1580517
Log:
LUCENE-5528: add contexts to AnalyzingInfixSuggester

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/StringField.java
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggesterTest.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Sun Mar 23 15:43:10 2014
@@ -74,6 +74,11 @@ New Features
   first pass search using scores from a more costly second pass
   search. (Simon Willnauer, Robert Muir, Mike McCandless)
 
+* LUCENE-5528: Add context to suggesters (InputIterator and Lookup
+  classes), and fix AnalyzingInfixSuggester to handle contexts.
+  Suggester contexts allow you to filter suggestions.  (Areek Zillur,
+  Mike McCandless)
+
 * LUCENE-5545: Add SortRescorer and Expression.getRescorer, to
   resort the hits from a first pass search using a Sort or an
   Expression. (Simon Willnauer, Robert Muir, Mike McCandless)

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/StringField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/StringField.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/StringField.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/document/StringField.java Sun Mar 23 15:43:10 2014
@@ -1,7 +1,5 @@
 package org.apache.lucene.document;
 
-import org.apache.lucene.index.FieldInfo.IndexOptions;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -19,6 +17,8 @@ import org.apache.lucene.index.FieldInfo
  * limitations under the License.
  */
 
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+
 /** A field that is indexed but not tokenized: the entire
  *  String value is indexed as a single token.  For example
  *  this might be used for a 'country' field or an 'id'

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java Sun Mar 23 15:43:10 2014
@@ -19,13 +19,13 @@ package org.apache.lucene.search.spell;
 
 import java.io.IOException;
 import java.util.Comparator;
+import java.util.Set;
 
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.search.suggest.InputIterator;
-import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.BytesRef;
 
 /**
@@ -119,5 +119,15 @@ public class HighFrequencyDictionary imp
     public boolean hasPayloads() {
       return false;
     }
+
+    @Override
+    public Set<BytesRef> contexts() {
+      return null;
+    }
+
+    @Override
+    public boolean hasContexts() {
+      return false;
+    }
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java Sun Mar 23 15:43:10 2014
@@ -19,6 +19,9 @@ package org.apache.lucene.search.suggest
 
 import java.io.IOException;
 import java.util.Comparator;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
 
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@@ -35,6 +38,8 @@ public class BufferedInputIterator imple
   protected BytesRefArray entries = new BytesRefArray(Counter.newCounter());
   /** buffered payload entries */
   protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter());
+  /** buffered context set entries */
+  protected List<Set<BytesRef>> contextSets = new ArrayList<>();
   /** current buffer position */
   protected int curPos = -1;
   /** buffered weights, parallel with {@link #entries} */
@@ -44,16 +49,22 @@ public class BufferedInputIterator imple
   private final boolean hasPayloads;
   private final Comparator<BytesRef> comp;
 
+  private final boolean hasContexts;
+
   /** Creates a new iterator, buffering entries from the specified iterator */
   public BufferedInputIterator(InputIterator source) throws IOException {
     BytesRef spare;
     int freqIndex = 0;
     hasPayloads = source.hasPayloads();
+    hasContexts = source.hasContexts();
     while((spare = source.next()) != null) {
       entries.append(spare);
       if (hasPayloads) {
         payloads.append(source.payload());
       }
+      if (hasContexts) {
+        contextSets.add(source.contexts());
+      }
       if (freqIndex >= freqs.length) {
         freqs = ArrayUtil.grow(freqs, freqs.length+1);
       }
@@ -93,4 +104,17 @@ public class BufferedInputIterator imple
   public Comparator<BytesRef> getComparator() {
     return comp;
   }
+
+  @Override
+  public Set<BytesRef> contexts() {
+    if (hasContexts && curPos < contextSets.size()) {
+      return contextSets.get(curPos);
+    }
+    return null;
+  }
+
+  @Override
+  public boolean hasContexts() {
+    return hasContexts;
+  }
 }

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java Sun Mar 23 15:43:10 2014
@@ -22,6 +22,7 @@ import java.util.HashSet;
 import java.util.Set;
 
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.MultiDocValues;
@@ -30,12 +31,11 @@ import org.apache.lucene.index.NumericDo
 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefIterator;
 
 /**
  * <p>
- * Dictionary with terms, weights and optionally payload information 
- * taken from stored/indexed fields in a Lucene index.
+ * Dictionary with terms, weights, payload (optional) and contexts (optional)
+ * information taken from stored/indexed fields in a Lucene index.
  * </p>
  * <b>NOTE:</b> 
  *  <ul>
@@ -61,6 +61,8 @@ public class DocumentDictionary implemen
 
   /** Field to read payload from */
   protected final String payloadField;
+  /** Field to read contexts from */
+  protected final String contextsField;
   private final String field;
   private final String weightField;
   
@@ -80,15 +82,26 @@ public class DocumentDictionary implemen
    * for the entry.
    */
   public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField) {
+    this(reader, field, weightField, payloadField, null);
+  }
+
+  /**
+   * Creates a new dictionary with the contents of the fields named <code>field</code>
+   * for the terms, <code>weightField</code> for the weights that will be used for the 
+   * the corresponding terms, <code>payloadField</code> for the corresponding payloads
+   * for the entry and <code>contextsFeild</code> for associated contexts.
+   */
+  public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField, String contextsField) {
     this.reader = reader;
     this.field = field;
     this.weightField = weightField;
     this.payloadField = payloadField;
+    this.contextsField = contextsField;
   }
-  
+
   @Override
   public InputIterator getEntryIterator() throws IOException {
-    return new DocumentInputIterator(payloadField!=null);
+    return new DocumentInputIterator(payloadField!=null, contextsField!=null);
   }
 
   /** Implements {@link InputIterator} from stored fields. */
@@ -97,24 +110,27 @@ public class DocumentDictionary implemen
     private final int docCount;
     private final Set<String> relevantFields;
     private final boolean hasPayloads;
+    private final boolean hasContexts;
     private final Bits liveDocs;
     private int currentDocId = -1;
     private long currentWeight;
     private BytesRef currentPayload;
+    private Set<BytesRef> currentContexts;
     private final NumericDocValues weightValues;
-    
+
     
     /**
      * Creates an iterator over term, weight and payload fields from the lucene
      * index. setting <code>withPayload</code> to false, implies an iterator
      * over only term and weight.
      */
-    public DocumentInputIterator(boolean hasPayloads) throws IOException {
+    public DocumentInputIterator(boolean hasPayloads, boolean hasContexts) throws IOException {
       this.hasPayloads = hasPayloads;
+      this.hasContexts = hasContexts;
       docCount = reader.maxDoc() - 1;
       weightValues = (weightField != null) ? MultiDocValues.getNumericValues(reader, weightField) : null;
       liveDocs = (reader.leaves().size() > 0) ? MultiFields.getLiveDocs(reader) : null;
-      relevantFields = getRelevantFields(new String [] {field, weightField, payloadField});
+      relevantFields = getRelevantFields(new String [] {field, weightField, payloadField, contextsField});
     }
 
     @Override
@@ -136,10 +152,11 @@ public class DocumentDictionary implemen
         }
 
         Document doc = reader.document(currentDocId, relevantFields);
-        
+
         BytesRef tempPayload = null;
         BytesRef tempTerm = null;
-        
+        Set<BytesRef> tempContexts = new HashSet<>();
+
         if (hasPayloads) {
           IndexableField payload = doc.getField(payloadField);
           if (payload == null || (payload.binaryValue() == null && payload.stringValue() == null)) {
@@ -147,16 +164,28 @@ public class DocumentDictionary implemen
           }
           tempPayload = (payload.binaryValue() != null) ? payload.binaryValue() : new BytesRef(payload.stringValue());
         }
-        
+
+        if (hasContexts) {
+          final IndexableField[] contextFields = doc.getFields(contextsField);
+          for (IndexableField contextField : contextFields) {
+            if (contextField.binaryValue() == null && contextField.stringValue() == null) {
+              continue;
+            } else {
+              tempContexts.add((contextField.binaryValue() != null) ? contextField.binaryValue() : new BytesRef(contextField.stringValue()));
+            }
+          }
+        }
+
         IndexableField fieldVal = doc.getField(field);
         if (fieldVal == null || (fieldVal.binaryValue() == null && fieldVal.stringValue() == null)) {
           continue;
         }
         tempTerm = (fieldVal.stringValue() != null) ? new BytesRef(fieldVal.stringValue()) : fieldVal.binaryValue();
-        
+
         currentPayload = tempPayload;
+        currentContexts = tempContexts;
         currentWeight = getWeight(doc, currentDocId);
-        
+
         return tempTerm;
       }
       return null;
@@ -198,5 +227,18 @@ public class DocumentDictionary implemen
       }
       return relevantFields;
     }
+
+    @Override
+    public Set<BytesRef> contexts() {
+      if (hasContexts) {
+        return currentContexts;
+      }
+      return null;
+    }
+
+    @Override
+    public boolean hasContexts() {
+      return hasContexts;
+    }
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java Sun Mar 23 15:43:10 2014
@@ -27,7 +27,6 @@ import org.apache.lucene.index.IndexRead
 import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
-import org.apache.lucene.util.BytesRefIterator;
 
 
 /**
@@ -70,6 +69,17 @@ public class DocumentValueSourceDictiona
   
   /**
    * Creates a new dictionary with the contents of the fields named <code>field</code>
+   * for the terms, <code>payload</code> for the corresponding payloads, <code>contexts</code>
+   * for the associated contexts and uses the <code>weightsValueSource</code> supplied 
+   * to determine the score.
+   */
+  public DocumentValueSourceDictionary(IndexReader reader, String field,
+                                       ValueSource weightsValueSource, String payload, String contexts) {
+    super(reader, field, null, payload, contexts);
+    this.weightsValueSource = weightsValueSource;
+  }
+  /**
+   * Creates a new dictionary with the contents of the fields named <code>field</code>
    * for the terms, <code>payloadField</code> for the corresponding payloads
    * and uses the <code>weightsValueSource</code> supplied to determine the 
    * score.
@@ -77,7 +87,7 @@ public class DocumentValueSourceDictiona
   public DocumentValueSourceDictionary(IndexReader reader, String field,
                                        ValueSource weightsValueSource, String payload) {
     super(reader, field, null, payload);
-    this.weightsValueSource = weightsValueSource;  
+    this.weightsValueSource = weightsValueSource;
   }
   
   /** 
@@ -93,7 +103,7 @@ public class DocumentValueSourceDictiona
   
   @Override
   public InputIterator getEntryIterator() throws IOException {
-    return new DocumentValueSourceInputIterator(payloadField!=null);
+    return new DocumentValueSourceInputIterator(payloadField!=null, contextsField!=null);
   }
   
   final class DocumentValueSourceInputIterator extends DocumentDictionary.DocumentInputIterator {
@@ -105,10 +115,10 @@ public class DocumentValueSourceDictiona
     private final int[] starts;
     /** current leave index */
     private int currentLeafIndex = 0;
-    
-    public DocumentValueSourceInputIterator(boolean hasPayloads)
+
+    public DocumentValueSourceInputIterator(boolean hasPayloads, boolean hasContexts)
         throws IOException {
-      super(hasPayloads);
+      super(hasPayloads, hasContexts);
       leaves = reader.leaves();
       starts = new int[leaves.size() + 1];
       for (int i = 0; i < leaves.size(); i++) {

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java Sun Mar 23 15:43:10 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.search.suggest
 
 import java.io.*;
 import java.util.Comparator;
+import java.util.Set;
 
 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.util.BytesRef;
@@ -215,6 +216,16 @@ public class FileDictionary implements D
         curWeight = (long)Double.parseDouble(weight);
       }
     }
+
+    @Override
+    public Set<BytesRef> contexts() {
+      return null;
+    }
+
+    @Override
+    public boolean hasContexts() {
+      return false;
+    }
   }
 
 }

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java Sun Mar 23 15:43:10 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest
 
 import java.io.IOException;
 import java.util.Comparator;
+import java.util.Set;
 
 import org.apache.lucene.search.suggest.Lookup.LookupResult; // javadocs
 import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester; // javadocs
@@ -45,6 +46,15 @@ public interface InputIterator extends B
   /** Returns true if the iterator has payloads */
   public boolean hasPayloads();
   
+  /** 
+   * A term's contexts context can be used to filter suggestions.
+   * May return null, if suggest entries do not have any context
+   * */
+  public Set<BytesRef> contexts();
+  
+  /** Returns true if the iterator has contexts */
+  public boolean hasContexts();
+  
   /** Singleton InputIterator that iterates over 0 BytesRefs. */
   public static final InputIterator EMPTY = new InputIteratorWrapper(BytesRefIterator.EMPTY);
   
@@ -88,5 +98,15 @@ public interface InputIterator extends B
     public Comparator<BytesRef> getComparator() {
       return wrapped.getComparator();
     }
+
+    @Override
+    public Set<BytesRef> contexts() {
+      return null;
+    }
+
+    @Override
+    public boolean hasContexts() {
+      return false;
+    }
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java Sun Mar 23 15:43:10 2014
@@ -22,6 +22,7 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.util.Comparator;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.store.DataInput;
@@ -40,6 +41,7 @@ public abstract class Lookup {
 
   /**
    * Result of a lookup.
+   * @lucene.experimental
    */
   public static final class LookupResult implements Comparable<LookupResult> {
     /** the key's text */
@@ -55,31 +57,53 @@ public abstract class Lookup {
     /** the key's payload (null if not present) */
     public final BytesRef payload;
     
+    /** the key's contexts (null if not present) */
+    public final Set<BytesRef> contexts;
+    
     /**
      * Create a new result from a key+weight pair.
      */
     public LookupResult(CharSequence key, long value) {
-      this(key, value, null);
+      this(key, null, value, null, null);
     }
 
     /**
      * Create a new result from a key+weight+payload triple.
      */
     public LookupResult(CharSequence key, long value, BytesRef payload) {
-      this.key = key;
-      this.highlightKey = null;
-      this.value = value;
-      this.payload = payload;
+      this(key, null, value, payload, null);
     }
-
+    
     /**
      * Create a new result from a key+highlightKey+weight+payload triple.
      */
     public LookupResult(CharSequence key, Object highlightKey, long value, BytesRef payload) {
+      this(key, highlightKey, value, payload, null);
+    }
+    
+    /**
+     * Create a new result from a key+weight+payload+contexts triple.
+     */
+    public LookupResult(CharSequence key, long value, BytesRef payload, Set<BytesRef> contexts) {
+      this(key, null, value, payload, contexts);
+    }
+
+    /**
+     * Create a new result from a key+weight+contexts triple.
+     */
+    public LookupResult(CharSequence key, long value, Set<BytesRef> contexts) {
+      this(key, null, value, null, contexts);
+    }
+    
+    /**
+     * Create a new result from a key+highlightKey+weight+payload+contexts triple.
+     */
+    public LookupResult(CharSequence key, Object highlightKey, long value, BytesRef payload, Set<BytesRef> contexts) {
       this.key = key;
       this.highlightKey = highlightKey;
       this.value = value;
       this.payload = payload;
+      this.contexts = contexts;
     }
 
     @Override
@@ -211,7 +235,20 @@ public abstract class Lookup {
    * @param num maximum number of results to return
    * @return a list of possible completions, with their relative weight (e.g. popularity)
    */
-  public abstract List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) throws IOException;
+  public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) throws IOException {
+    return lookup(key, null, onlyMorePopular, num);
+  }
+
+  /**
+   * Look up a key and return possible completion for this key.
+   * @param key lookup key. Depending on the implementation this may be
+   * a prefix, misspelling, or even infix.
+   * @param contexts contexts to filter the lookup by, or null if all contexts are allowed; if the suggestion contains any of the contexts, it's a match
+   * @param onlyMorePopular return only more popular results
+   * @param num maximum number of results to return
+   * @return a list of possible completions, with their relative weight (e.g. popularity)
+   */
+  public abstract List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) throws IOException;
 
   /**
    * Persist the constructed lookup data to a directory. Optional operation.
@@ -235,4 +272,5 @@ public abstract class Lookup {
    * @return ram size of the lookup implementation in bytes
    */
   public abstract long sizeInBytes();
+
 }

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java Sun Mar 23 15:43:10 2014
@@ -20,6 +20,8 @@ package org.apache.lucene.search.suggest
 import java.io.File;
 import java.io.IOException;
 import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Set;
 
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ByteArrayDataOutput;
@@ -42,11 +44,13 @@ public class SortedInputIterator impleme
   private final ByteSequencesReader reader;
   private final Comparator<BytesRef> comparator;
   private final boolean hasPayloads;
+  private final boolean hasContexts;
   private boolean done = false;
   
   private long weight;
   private final BytesRef scratch = new BytesRef();
   private BytesRef payload = new BytesRef();
+  private Set<BytesRef> contexts = null;
   
   /**
    * Creates a new sorted wrapper, using {@link
@@ -62,6 +66,7 @@ public class SortedInputIterator impleme
    */
   public SortedInputIterator(InputIterator source, Comparator<BytesRef> comparator) throws IOException {
     this.hasPayloads = source.hasPayloads();
+    this.hasContexts = source.hasContexts();
     this.source = source;
     this.comparator = comparator;
     this.reader = sort();
@@ -80,6 +85,9 @@ public class SortedInputIterator impleme
         if (hasPayloads) {
           payload = decodePayload(scratch, input);
         }
+        if (hasContexts) {
+          contexts = decodeContexts(scratch, input);
+        }
         success = true;
         return scratch;
       }
@@ -111,12 +119,22 @@ public class SortedInputIterator impleme
   public boolean hasPayloads() {
     return hasPayloads;
   }
+  
+  @Override
+  public Set<BytesRef> contexts() {
+    return contexts;
+  }
 
   @Override
   public Comparator<BytesRef> getComparator() {
     return tieBreakByCostComparator;
   }
 
+  @Override
+  public boolean hasContexts() {
+    return hasContexts;
+  }
+
   /** Sortes by BytesRef (ascending) then cost (ascending). */
   private final Comparator<BytesRef> tieBreakByCostComparator = new Comparator<BytesRef>() {
 
@@ -139,6 +157,10 @@ public class SortedInputIterator impleme
         decodePayload(leftScratch, input);
         decodePayload(rightScratch, input);
       }
+      if (hasContexts) {
+        decodeContexts(leftScratch, input);
+        decodeContexts(rightScratch, input);
+      }
       int cmp = comparator.compare(leftScratch, rightScratch);
       if (cmp != 0) {
         return cmp;
@@ -168,7 +190,7 @@ public class SortedInputIterator impleme
       ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
 
       while ((spare = source.next()) != null) {
-        encode(writer, output, buffer, spare, source.payload(), source.weight());
+        encode(writer, output, buffer, spare, source.payload(), source.contexts(), source.weight());
       }
       writer.close();
       new OfflineSorter(tieBreakByCostComparator).sort(tempInput, tempSorted);
@@ -199,9 +221,15 @@ public class SortedInputIterator impleme
     }
   }
   
-  /** encodes an entry (bytes+(payload)+weight) to the provided writer */
-  protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException {
+  /** encodes an entry (bytes+(payload)+(contexts)+weight) to the provided writer */
+  protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, Set<BytesRef> contexts, long weight) throws IOException {
     int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0);
+    if (hasContexts) {
+      for(BytesRef ctx : contexts) {
+        requiredLength += 2 + ctx.length;
+      }
+      requiredLength += 2; // for length of contexts
+    }
     if (requiredLength >= buffer.length) {
       buffer = ArrayUtil.grow(buffer, requiredLength);
     }
@@ -211,6 +239,13 @@ public class SortedInputIterator impleme
       output.writeBytes(payload.bytes, payload.offset, payload.length);
       output.writeShort((short) payload.length);
     }
+    if (hasContexts) {
+      for (BytesRef ctx : contexts) {
+        output.writeBytes(ctx.bytes, ctx.offset, ctx.length);
+        output.writeShort((short) ctx.length);
+      }
+      output.writeShort((short) contexts.size());
+    }
     output.writeLong(weight);
     writer.write(buffer, 0, output.getPosition());
   }
@@ -223,6 +258,27 @@ public class SortedInputIterator impleme
     return tmpInput.readLong();
   }
   
+  /** decodes the contexts at the current position */
+  protected Set<BytesRef> decodeContexts(BytesRef scratch, ByteArrayDataInput tmpInput) {
+    tmpInput.reset(scratch.bytes);
+    tmpInput.skipBytes(scratch.length - 2); //skip to context set size
+    short ctxSetSize = tmpInput.readShort();
+    scratch.length -= 2;
+    final Set<BytesRef> contextSet = new HashSet<>();
+    for (short i = 0; i < ctxSetSize; i++) {
+      tmpInput.setPosition(scratch.length - 2);
+      short curContextLength = tmpInput.readShort();
+      scratch.length -= 2;
+      tmpInput.setPosition(scratch.length - curContextLength);
+      BytesRef contextSpare = new BytesRef(curContextLength);
+      tmpInput.readBytes(contextSpare.bytes, 0, curContextLength);
+      contextSpare.length = curContextLength;
+      contextSet.add(contextSpare);
+      scratch.length -= curContextLength;
+    }
+    return contextSet;
+  }
+  
   /** decodes the payload at the current position */
   protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) {
     tmpInput.reset(scratch.bytes);

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java Sun Mar 23 15:43:10 2014
@@ -19,6 +19,7 @@ package org.apache.lucene.search.suggest
 
 import java.io.IOException;
 import java.util.Random;
+import java.util.Set;
 
 import org.apache.lucene.util.BytesRef;
 
@@ -75,4 +76,13 @@ public class UnsortedInputIterator exten
     }
     return null;
   }
+  
+  @Override
+  public Set<BytesRef> contexts() {
+    if (hasContexts() && curPos < contextSets.size()) {
+      assert currentOrd == ords[curPos];
+      return contextSets.get(currentOrd);
+    }
+    return null;
+  }
 }

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java Sun Mar 23 15:43:10 2014
@@ -38,6 +38,7 @@ import org.apache.lucene.document.Docume
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.AtomicReader;
@@ -49,7 +50,9 @@ import org.apache.lucene.index.FilterAto
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.MultiDocValues;
+import org.apache.lucene.index.ReaderUtil;
 import org.apache.lucene.index.SegmentReader;
+import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.sorter.EarlyTerminatingSortingCollector;
 import org.apache.lucene.index.sorter.SortingMergePolicy;
@@ -96,7 +99,12 @@ import org.apache.lucene.util.Version;
  *  by the suggest weight; it would be nice to support
  *  blended score + weight sort in the future.  This means
  *  this suggester best applies when there is a strong
- *  apriori ranking of all the suggestions. */
+ *  apriori ranking of all the suggestions.
+ *
+ *  <p>This suggester supports contexts, however the
+ *  contexts must be valid utf8 (arbitrary binary terms will
+ *  not work).
+ * @lucene.experimental */    
 
 public class AnalyzingInfixSuggester extends Lookup implements Closeable {
 
@@ -104,9 +112,13 @@ public class AnalyzingInfixSuggester ext
   protected final static String TEXT_FIELD_NAME = "text";
 
   /** Field name used for the indexed text, as a
-   * StringField, for exact lookup. */
+   *  StringField, for exact lookup. */
   protected final static String EXACT_TEXT_FIELD_NAME = "exacttext";
 
+  /** Field name used for the indexed context, as a
+   *  StringField and a SortedSetDVField, for filtering. */
+  protected final static String CONTEXTS_FIELD_NAME = "contexts";
+
   /** Analyzer used at search time */
   protected final Analyzer queryAnalyzer;
   /** Analyzer used at index time */
@@ -188,7 +200,7 @@ public class AnalyzingInfixSuggester ext
 
   @Override
   public void build(InputIterator iter) throws IOException {
-
+    
     if (searcherMgr != null) {
       searcherMgr.close();
       searcherMgr = null;
@@ -206,45 +218,21 @@ public class AnalyzingInfixSuggester ext
       // just indexing the suggestions as they iterate:
       writer = new IndexWriter(dir,
                                getIndexWriterConfig(matchVersion, getGramAnalyzer(), IndexWriterConfig.OpenMode.CREATE));
-      BytesRef text;
-      Document doc = new Document();
-      FieldType ft = getTextFieldType();
-      Field textField = new Field(TEXT_FIELD_NAME, "", ft);
-      doc.add(textField);
-
-      Field textGramField = new Field("textgrams", "", ft);
-      doc.add(textGramField);
-
-      Field exactTextField = new StringField(EXACT_TEXT_FIELD_NAME, "", Field.Store.NO);
-      doc.add(exactTextField);
-
-      Field textDVField = new BinaryDocValuesField(TEXT_FIELD_NAME, new BytesRef());
-      doc.add(textDVField);
-
-      // TODO: use threads...?
-      Field weightField = new NumericDocValuesField("weight", 0L);
-      doc.add(weightField);
-
-      Field payloadField;
-      if (iter.hasPayloads()) {
-        payloadField = new BinaryDocValuesField("payloads", new BytesRef());
-        doc.add(payloadField);
-      } else {
-        payloadField = null;
-      }
       //long t0 = System.nanoTime();
+
+      // TODO: use threads?
+      BytesRef text;
       while ((text = iter.next()) != null) {
-        String textString = text.utf8ToString();
-        textField.setStringValue(textString);
-        exactTextField.setStringValue(textString);
-        textGramField.setStringValue(textString);
-        textDVField.setBytesValue(text);
-        weightField.setLongValue(iter.weight());
+        BytesRef payload;
         if (iter.hasPayloads()) {
-          payloadField.setBytesValue(iter.payload());
+          payload = iter.payload();
+        } else {
+          payload = null;
         }
-        writer.addDocument(doc);
+
+        add(text, iter.contexts(), iter.weight(), payload);
       }
+
       //System.out.println("initial indexing time: " + ((System.nanoTime()-t0)/1000000) + " msec");
 
       searcherMgr = new SearcherManager(writer, true, null);
@@ -285,19 +273,8 @@ public class AnalyzingInfixSuggester ext
    *  After adding or updating a batch of new suggestions,
    *  you must call {@link #refresh} in the end in order to
    *  see the suggestions in {@link #lookup} */
-  public void add(BytesRef text, long weight, BytesRef payload) throws IOException {
-    String textString = text.utf8ToString();
-    Document doc = new Document();
-    FieldType ft = getTextFieldType();
-    doc.add(new Field(TEXT_FIELD_NAME, textString, ft));
-    doc.add(new Field("textgrams", textString, ft));
-    doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO));
-    doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text));
-    doc.add(new NumericDocValuesField("weight", weight));
-    if (payload != null) {
-      doc.add(new BinaryDocValuesField("payloads", payload));
-    }
-    writer.addDocument(doc);
+  public void add(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) throws IOException {
+    writer.addDocument(buildDocument(text, contexts, weight, payload));
   }
 
   /** Updates a previous suggestion, matching the exact same
@@ -307,7 +284,12 @@ public class AnalyzingInfixSuggester ext
    *  #add} instead.  After adding or updating a batch of
    *  new suggestions, you must call {@link #refresh} in the
    *  end in order to see the suggestions in {@link #lookup} */
-  public void update(BytesRef text, long weight, BytesRef payload) throws IOException {
+  public void update(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) throws IOException {
+    writer.updateDocument(new Term(EXACT_TEXT_FIELD_NAME, text.utf8ToString()),
+                          buildDocument(text, contexts, weight, payload));
+  }
+
+  private Document buildDocument(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) throws IOException {
     String textString = text.utf8ToString();
     Document doc = new Document();
     FieldType ft = getTextFieldType();
@@ -319,7 +301,15 @@ public class AnalyzingInfixSuggester ext
     if (payload != null) {
       doc.add(new BinaryDocValuesField("payloads", payload));
     }
-    writer.updateDocument(new Term(EXACT_TEXT_FIELD_NAME, textString), doc);
+    if (contexts != null) {
+      for(BytesRef context : contexts) {
+        // TODO: if we had a BinaryTermField we could fix
+        // this "must be valid ut8f" limitation:
+        doc.add(new StringField(CONTEXTS_FIELD_NAME, context.utf8ToString(), Field.Store.NO));
+        doc.add(new SortedSetDocValuesField(CONTEXTS_FIELD_NAME, context));
+      }
+    }
+    return doc;
   }
 
   /** Reopens the underlying searcher; it's best to "batch
@@ -342,8 +332,13 @@ public class AnalyzingInfixSuggester ext
   }
 
   @Override
-  public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) throws IOException {
-    return lookup(key, num, true, true);
+  public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) throws IOException {
+    return lookup(key, contexts, num, true, true);
+  }
+
+  /** Lookup, without any context. */
+  public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+    return lookup(key, null, num, allTermsRequired, doHighlight);
   }
 
   /** This is called if the last token isn't ended
@@ -361,7 +356,7 @@ public class AnalyzingInfixSuggester ext
   /** Retrieve suggestions, specifying whether all terms
    *  must match ({@code allTermsRequired}) and whether the hits
    *  should be highlighted ({@code doHighlight}). */
-  public List<LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+  public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
 
     if (searcherMgr == null) {
       throw new IllegalStateException("suggester was not built");
@@ -424,6 +419,24 @@ public class AnalyzingInfixSuggester ext
           query.add(lastQuery, occur);
         }
       }
+
+      if (contexts != null) {
+        BooleanQuery sub = new BooleanQuery();
+        query.add(sub, BooleanClause.Occur.MUST);
+        for(BytesRef context : contexts) {
+          // NOTE: we "should" wrap this in
+          // ConstantScoreQuery, or maybe send this as a
+          // Filter instead to search, but since all of
+          // these are MUST'd, the change to the score won't
+          // affect the overall ranking.  Since we indexed
+          // as DOCS_ONLY, the perf should be the same
+          // either way (no freq int[] blocks to decode):
+
+          // TODO: if we had a BinaryTermField we could fix
+          // this "must be valid ut8f" limitation:
+          sub.add(new TermQuery(new Term(CONTEXTS_FIELD_NAME, context.utf8ToString())), BooleanClause.Occur.SHOULD);
+        }
+      }
     } finally {
       IOUtils.closeWhileHandlingException(ts);
     }
@@ -432,8 +445,6 @@ public class AnalyzingInfixSuggester ext
     // weight w/ score.  Now we ignore score and sort only
     // by weight:
 
-    //System.out.println("INFIX query=" + query);
-
     Query finalQuery = finishQuery(query, allTermsRequired);
 
     //System.out.println("finalQuery=" + query);
@@ -480,6 +491,7 @@ public class AnalyzingInfixSuggester ext
     // This will just be null if app didn't pass payloads to build():
     // TODO: maybe just stored fields?  they compress...
     BinaryDocValues payloadsDV = MultiDocValues.getBinaryValues(searcher.getIndexReader(), "payloads");
+    List<AtomicReaderContext> leaves = searcher.getIndexReader().leaves();
     List<LookupResult> results = new ArrayList<>();
     BytesRef scratch = new BytesRef();
     for (int i=0;i<hits.scoreDocs.length;i++) {
@@ -496,13 +508,30 @@ public class AnalyzingInfixSuggester ext
         payload = null;
       }
 
+      // Must look up sorted-set by segment:
+      int segment = ReaderUtil.subIndex(fd.doc, leaves);
+      SortedSetDocValues contextsDV = leaves.get(segment).reader().getSortedSetDocValues(CONTEXTS_FIELD_NAME);
+      Set<BytesRef> contexts;
+      if (contextsDV != null) {
+        contexts = new HashSet<BytesRef>();
+        contextsDV.setDocument(fd.doc - leaves.get(segment).docBase);
+        long ord;
+        while ((ord = contextsDV.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
+          BytesRef context = new BytesRef();
+          contextsDV.lookupOrd(ord, context);
+          contexts.add(context);
+        }
+      } else {
+        contexts = null;
+      }
+
       LookupResult result;
 
       if (doHighlight) {
         Object highlightKey = highlight(text, matchedTokens, prefixToken);
-        result = new LookupResult(highlightKey.toString(), highlightKey, score, payload);
+        result = new LookupResult(highlightKey.toString(), highlightKey, score, payload, contexts);
       } else {
-        result = new LookupResult(text, score, payload);
+        result = new LookupResult(text, score, payload, contexts);
       }
 
       results.add(result);

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java Sun Mar 23 15:43:10 2014
@@ -380,6 +380,9 @@ public class AnalyzingSuggester extends 
 
   @Override
   public void build(InputIterator iterator) throws IOException {
+    if (iterator.hasContexts()) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
+    }
     String prefix = getClass().getSimpleName();
     File directory = OfflineSorter.defaultTempDir();
     File tempInput = File.createTempFile(prefix, ".input", directory);
@@ -639,12 +642,15 @@ public class AnalyzingSuggester extends 
   }
 
   @Override
-  public List<LookupResult> lookup(final CharSequence key, boolean onlyMorePopular, int num) {
+  public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) {
     assert num > 0;
 
     if (onlyMorePopular) {
       throw new IllegalArgumentException("this suggester only works with onlyMorePopular=false");
     }
+    if (contexts != null) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
+    }
     if (fst == null) {
       return Collections.emptyList();
     }

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggester.java Sun Mar 23 15:43:10 2014
@@ -50,6 +50,8 @@ import org.apache.lucene.util.Version;
  * the indexed text.
  * Please note that it increases the number of elements searched and applies the
  * ponderation after. It might be costly for long suggestions.
+ *
+ * @lucene.experimental
  */
 public class BlendedInfixSuggester extends AnalyzingInfixSuggester {
 
@@ -114,15 +116,15 @@ public class BlendedInfixSuggester exten
   }
 
   @Override
-  public List<Lookup.LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) throws IOException {
+  public List<Lookup.LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) throws IOException {
     // here we multiply the number of searched element by the defined factor
-    return super.lookup(key, onlyMorePopular, num * numFactor);
+    return super.lookup(key, contexts, onlyMorePopular, num * numFactor);
   }
 
   @Override
-  public List<Lookup.LookupResult> lookup(CharSequence key, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
+  public List<Lookup.LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, int num, boolean allTermsRequired, boolean doHighlight) throws IOException {
     // here we multiply the number of searched element by the defined factor
-    return super.lookup(key, num * numFactor, allTermsRequired, doHighlight);
+    return super.lookup(key, contexts, num * numFactor, allTermsRequired, doHighlight);
   }
 
   @Override

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java Sun Mar 23 15:43:10 2014
@@ -286,7 +286,10 @@ public class FreeTextSuggester extends L
    *  the weights for the suggestions are ignored. */
   public void build(InputIterator iterator, double ramBufferSizeMB) throws IOException {
     if (iterator.hasPayloads()) {
-      throw new IllegalArgumentException("payloads are not supported");
+      throw new IllegalArgumentException("this suggester doesn't support payloads");
+    }
+    if (iterator.hasContexts()) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
     }
 
     String prefix = getClass().getSimpleName();
@@ -433,8 +436,18 @@ public class FreeTextSuggester extends L
 
   @Override
   public List<LookupResult> lookup(final CharSequence key, /* ignored */ boolean onlyMorePopular, int num) {
+    return lookup(key, null, onlyMorePopular, num);
+  }
+
+  /** Lookup, without any context. */
+  public List<LookupResult> lookup(final CharSequence key, int num) {
+    return lookup(key, null, true, num);
+  }
+
+  @Override
+  public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, /* ignored */ boolean onlyMorePopular, int num) {
     try {
-      return lookup(key, num);
+      return lookup(key, contexts, num);
     } catch (IOException ioe) {
       // bogus:
       throw new RuntimeException(ioe);
@@ -458,7 +471,11 @@ public class FreeTextSuggester extends L
   }
 
   /** Retrieve suggestions. */
-  public List<LookupResult> lookup(final CharSequence key, int num) throws IOException {
+  public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, int num) throws IOException {
+    if (contexts != null) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
+    }
+
     TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
     try {
       TermToBytesRefAttribute termBytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java Sun Mar 23 15:43:10 2014
@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.lucene.search.suggest.InputIterator;
 import org.apache.lucene.search.suggest.Lookup;
@@ -30,16 +31,16 @@ import org.apache.lucene.store.ByteArray
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.NoOutputs;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.OfflineSorter;
 import org.apache.lucene.util.OfflineSorter.SortInfo;
+import org.apache.lucene.util.OfflineSorter;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.NoOutputs;
 
 /**
  * An adapter from {@link Lookup} API to {@link FSTCompletion}.
@@ -150,6 +151,9 @@ public class FSTCompletionLookup extends
     if (iterator.hasPayloads()) {
       throw new IllegalArgumentException("this suggester doesn't support payloads");
     }
+    if (iterator.hasContexts()) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
+    }
     File tempInput = File.createTempFile(
         FSTCompletionLookup.class.getSimpleName(), ".input", OfflineSorter.defaultTempDir());
     File tempSorted = File.createTempFile(
@@ -243,7 +247,10 @@ public class FSTCompletionLookup extends
   }
 
   @Override
-  public List<LookupResult> lookup(CharSequence key, boolean higherWeightsFirst, int num) {
+  public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean higherWeightsFirst, int num) {
+    if (contexts != null) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
+    }
     final List<Completion> completions;
     if (higherWeightsFirst) {
       completions = higherWeightsCompletion.lookup(key, num);

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java Sun Mar 23 15:43:10 2014
@@ -17,6 +17,13 @@ package org.apache.lucene.search.suggest
  * limitations under the License.
  */
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Set;
+
 import org.apache.lucene.search.suggest.InputIterator;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.SortedInputIterator;
@@ -31,20 +38,14 @@ import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.fst.Builder;
-import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FST.Arc;
 import org.apache.lucene.util.fst.FST.BytesReader;
+import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util.Result;
 import org.apache.lucene.util.fst.Util.TopResults;
 import org.apache.lucene.util.fst.Util;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-
 /**
  * Suggester based on a weighted FST: it first traverses the prefix, 
  * then walks the <i>n</i> shortest paths to retrieve top-ranked
@@ -97,6 +98,9 @@ public class WFSTCompletionLookup extend
     if (iterator.hasPayloads()) {
       throw new IllegalArgumentException("this suggester doesn't support payloads");
     }
+    if (iterator.hasContexts()) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
+    }
     count = 0;
     BytesRef scratch = new BytesRef();
     InputIterator iter = new WFSTInputIterator(iterator);
@@ -140,7 +144,10 @@ public class WFSTCompletionLookup extend
   }
 
   @Override
-  public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
+  public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) {
+    if (contexts != null) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
+    }
     assert num > 0;
 
     if (onlyMorePopular) {
@@ -260,7 +267,7 @@ public class WFSTCompletionLookup extend
     }
 
     @Override
-    protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException {
+    protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, Set<BytesRef> contexts, long weight) throws IOException {
       if (spare.length + 4 >= buffer.length) {
         buffer = ArrayUtil.grow(buffer, spare.length + 4);
       }

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java Sun Mar 23 15:43:10 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.search.suggest
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.lucene.search.suggest.InputIterator;
 import org.apache.lucene.search.suggest.Lookup;
@@ -29,7 +30,6 @@ import org.apache.lucene.store.DataInput
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
-import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.UnicodeUtil;
 
 /**
@@ -62,6 +62,10 @@ public class JaspellLookup extends Looku
       // WTF - this could result in yet another sorted iteration....
       tfit = new UnsortedInputIterator(tfit);
     }
+    if (tfit.hasContexts()) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
+    }
+    count = 0;
     trie = new JaspellTernarySearchTrie();
     trie.setMatchAlmostDiff(editDistance);
     BytesRef spare;
@@ -99,7 +103,10 @@ public class JaspellLookup extends Looku
   }
 
   @Override
-  public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
+  public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) {
+    if (contexts != null) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
+    }
     List<LookupResult> res = new ArrayList<>();
     List<String> list;
     int count = onlyMorePopular ? num * 2 : num;

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java Sun Mar 23 15:43:10 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.search.suggest
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Set;
 
 import org.apache.lucene.search.suggest.InputIterator;
 import org.apache.lucene.search.suggest.Lookup;
@@ -55,6 +56,9 @@ public class TSTLookup extends Lookup {
     if (tfit.hasPayloads()) {
       throw new IllegalArgumentException("this suggester doesn't support payloads");
     }
+    if (tfit.hasContexts()) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
+    }
     root = new TernaryTreeNode();
     // buffer first
     if (tfit.getComparator() != BytesRef.getUTF8SortedAsUTF16Comparator()) {
@@ -118,7 +122,10 @@ public class TSTLookup extends Lookup {
   }
 
   @Override
-  public List<LookupResult> lookup(CharSequence key, boolean onlyMorePopular, int num) {
+  public List<LookupResult> lookup(CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) {
+    if (contexts != null) {
+      throw new IllegalArgumentException("this suggester doesn't support contexts");
+    }
     List<TernaryTreeNode> list = autocomplete.prefixCompletion(root, key, 0);
     List<LookupResult> res = new ArrayList<>();
     if (list == null || list.size() == 0) {

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java Sun Mar 23 15:43:10 2014
@@ -4,9 +4,11 @@ import java.io.IOException;
 import java.util.AbstractMap.SimpleEntry;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.Set;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
@@ -50,9 +52,10 @@ public class DocumentDictionaryTest exte
   static final String FIELD_NAME = "f1";
   static final String WEIGHT_FIELD_NAME = "w1";
   static final String PAYLOAD_FIELD_NAME = "p1";
+  static final String CONTEXT_FIELD_NAME = "c1";
   
   /** Returns Pair(list of invalid document terms, Map of document term -> document) */
-  private Map.Entry<List<String>, Map<String, Document>> generateIndexDocuments(int ndocs, boolean requiresPayload) {
+  private Map.Entry<List<String>, Map<String, Document>> generateIndexDocuments(int ndocs, boolean requiresPayload, boolean requiresContexts) {
     Map<String, Document> docs = new HashMap<>();
     List<String> invalidDocTerms = new ArrayList<>();
     for(int i = 0; i < ndocs ; i++) {
@@ -78,6 +81,15 @@ public class DocumentDictionaryTest exte
         }
       }
       
+      if (requiresContexts || usually()) {
+        if (usually()) {
+          for (int j = 0; j < atLeast(2); j++) {
+            doc.add(new StoredField(CONTEXT_FIELD_NAME, new BytesRef("context_" + i + "_"+ j)));
+          }
+        }
+        // we should allow entries without context
+      }
+      
       // usually have valid weight field in document
       if (usually()) {
         Field weight = (rarely()) ? 
@@ -126,7 +138,7 @@ public class DocumentDictionaryTest exte
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true, false);
     Map<String, Document> docs = res.getValue();
     List<String> invalidDocTerms = res.getKey();
     for(Document doc: docs.values()) {
@@ -161,7 +173,7 @@ public class DocumentDictionaryTest exte
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false, false);
     Map<String, Document> docs = res.getValue();
     List<String> invalidDocTerms = res.getKey();
     for(Document doc: docs.values()) {
@@ -192,12 +204,53 @@ public class DocumentDictionaryTest exte
   }
   
   @Test
+  public void testWithContexts() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true, true);
+    Map<String, Document> docs = res.getValue();
+    List<String> invalidDocTerms = res.getKey();
+    for(Document doc: docs.values()) {
+      writer.addDocument(doc);
+    }
+    writer.commit();
+    writer.close();
+    IndexReader ir = DirectoryReader.open(dir);
+    Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
+    InputIterator inputIterator = dictionary.getEntryIterator();
+    BytesRef f;
+    while((f = inputIterator.next())!=null) {
+      Document doc = docs.remove(f.utf8ToString());
+      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
+      IndexableField weightField = doc.getField(WEIGHT_FIELD_NAME);
+      assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0);
+      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      Set<BytesRef> oriCtxs = new HashSet<>();
+      Set<BytesRef> contextSet = inputIterator.contexts();
+      for (IndexableField ctxf : doc.getFields(CONTEXT_FIELD_NAME)) {
+        oriCtxs.add(ctxf.binaryValue());
+      }
+      assertEquals(oriCtxs.size(), contextSet.size());
+    }
+    
+    for (String invalidTerm : invalidDocTerms) {
+      assertNotNull(docs.remove(invalidTerm));
+    }
+    assertTrue(docs.isEmpty());
+    
+    ir.close();
+    dir.close();
+  }
+  
+  @Test
   public void testWithDeletions() throws IOException {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
     iwc.setMergePolicy(newLogMergePolicy());
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
-    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false);
+    Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false, false);
     Map<String, Document> docs = res.getValue();
     List<String> invalidDocTerms = res.getKey();
     Random rand = random();

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java Sun Mar 23 15:43:10 2014
@@ -20,9 +20,11 @@ package org.apache.lucene.search.suggest
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.Set;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
@@ -33,6 +35,7 @@ import org.apache.lucene.document.TextFi
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.function.ValueSource;
@@ -42,8 +45,8 @@ import org.apache.lucene.queries.functio
 import org.apache.lucene.search.spell.Dictionary;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
+import org.apache.lucene.util.LuceneTestCase;
 import org.junit.Test;
 
 @SuppressCodecs("Lucene3x")
@@ -54,6 +57,7 @@ public class DocumentValueSourceDictiona
   static final String WEIGHT_FIELD_NAME_2 = "w2";
   static final String WEIGHT_FIELD_NAME_3 = "w3";
   static final String PAYLOAD_FIELD_NAME = "p1";
+  static final String CONTEXTS_FIELD_NAME = "c1";
 
   private Map<String, Document> generateIndexDocuments(int ndocs) {
     Map<String, Document> docs = new HashMap<>();
@@ -63,12 +67,18 @@ public class DocumentValueSourceDictiona
       Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i);
       Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i);
       Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i);
+      Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_"  + i + "_0"));
       Document doc = new Document();
       doc.add(field);
       doc.add(payload);
       doc.add(weight1);
       doc.add(weight2);
       doc.add(weight3);
+      doc.add(contexts);
+      for(int j = 1; j < atLeast(3); j++) {
+        contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j));
+        doc.add(contexts);
+      }
       docs.put(field.stringValue(), doc);
     }
     return docs;
@@ -126,6 +136,43 @@ public class DocumentValueSourceDictiona
     ir.close();
     dir.close();
   }
+  
+  @Test
+  public void testWithContext() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
+    Map<String, Document> docs = generateIndexDocuments(atLeast(100));
+    for(Document doc: docs.values()) {
+      writer.addDocument(doc);
+    }
+    writer.commit();
+    writer.close();
+
+    IndexReader ir = DirectoryReader.open(dir);
+    ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)};
+    Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME);
+    InputIterator inputIterator = dictionary.getEntryIterator();
+    BytesRef f;
+    while((f = inputIterator.next())!=null) {
+      Document doc = docs.remove(f.utf8ToString());
+      long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue();
+      long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue();
+      long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue();
+      assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME))));
+      assertEquals(inputIterator.weight(), (w1 + w2 + w3));
+      assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue()));
+      Set<BytesRef> originalCtxs = new HashSet<>();
+      for (IndexableField ctxf: doc.getFields(CONTEXTS_FIELD_NAME)) {
+        originalCtxs.add(ctxf.binaryValue());
+      }
+      assertEquals(originalCtxs, inputIterator.contexts());
+    }
+    assertTrue(docs.isEmpty());
+    ir.close();
+    dir.close();
+  }
 
   @Test
   public void testWithoutPayload() throws IOException {

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java Sun Mar 23 15:43:10 2014
@@ -17,6 +17,8 @@ package org.apache.lucene.search.suggest
  * limitations under the License.
  */
 
+import java.util.Set;
+
 import org.apache.lucene.util.BytesRef;
 
 /** corresponds to {@link InputIterator}'s entries */
@@ -25,28 +27,55 @@ public final class Input {
   public final long v;
   public final BytesRef payload;
   public final boolean hasPayloads;
+  public final Set<BytesRef> contexts;
+  public final boolean hasContexts;
 
   public Input(BytesRef term, long v, BytesRef payload) {
-    this(term, v, payload, true);
+    this(term, v, payload, true, null, false);
   }
   
   public Input(String term, long v, BytesRef payload) {
-    this(new BytesRef(term), v, payload, true);
+    this(new BytesRef(term), v, payload);
+  }
+  
+  public Input(BytesRef term, long v, Set<BytesRef> contexts) {
+    this(term, v, null, false, contexts, true);
+  }
+  
+  public Input(String term, long v, Set<BytesRef> contexts) {
+    this(new BytesRef(term), v, null, false, contexts, true);
   }
   
   public Input(BytesRef term, long v) {
-    this(term, v, null, false);
+    this(term, v, null, false, null, false);
   }
   
   public Input(String term, long v) {
-    this(new BytesRef(term), v, null, false);
+    this(new BytesRef(term), v, null, false, null, false);
   }
   
-  public Input(BytesRef term, long v, BytesRef payload, boolean hasPayloads) {
+  public Input(String term, int v, BytesRef payload, Set<BytesRef> contexts) {
+    this(new BytesRef(term), v, payload, true, contexts, true);
+  }
+
+  public Input(BytesRef term, long v, BytesRef payload, Set<BytesRef> contexts) {
+    this(term, v, payload, true, contexts, true);
+  }
+  
+
+  
+  public Input(BytesRef term, long v, BytesRef payload, boolean hasPayloads, Set<BytesRef> contexts, 
+      boolean hasContexts) {
     this.term = term;
     this.v = v;
     this.payload = payload;
     this.hasPayloads = hasPayloads;
+    this.contexts = contexts;
+    this.hasContexts = hasContexts;
+  }
+  
+  public boolean hasContexts() {
+    return hasContexts;
   }
   
   public boolean hasPayloads() {

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java Sun Mar 23 15:43:10 2014
@@ -20,6 +20,7 @@ package org.apache.lucene.search.suggest
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.Iterator;
+import java.util.Set;
 
 import org.apache.lucene.util.BytesRef;
 
@@ -29,6 +30,7 @@ import org.apache.lucene.util.BytesRef;
 public final class InputArrayIterator implements InputIterator {
   private final Iterator<Input> i;
   private final boolean hasPayloads;
+  private final boolean hasContexts;
   private boolean first;
   private Input current;
   private final BytesRef spare = new BytesRef();
@@ -39,8 +41,10 @@ public final class InputArrayIterator im
       current = i.next();
       first = true;
       this.hasPayloads = current.hasPayloads;
+      this.hasContexts = current.hasContexts;
     } else {
       this.hasPayloads = false;
+      this.hasContexts = false;
     }
   }
 
@@ -84,4 +88,14 @@ public final class InputArrayIterator im
   public Comparator<BytesRef> getComparator() {
     return null;
   }
+
+  @Override
+  public Set<BytesRef> contexts() {
+    return current.contexts;
+  }
+
+  @Override
+  public boolean hasContexts() {
+    return hasContexts;
+  }
 }
\ No newline at end of file

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java?rev=1580517&r1=1580516&r2=1580517&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java Sun Mar 23 15:43:10 2014
@@ -19,9 +19,11 @@ package org.apache.lucene.search.suggest
 
 import java.util.AbstractMap.SimpleEntry;
 import java.util.Comparator;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Random;
+import java.util.Set;
 import java.util.TreeMap;
 
 import org.apache.lucene.util.BytesRef;
@@ -45,21 +47,29 @@ public class TestInputIterator extends L
     Comparator<BytesRef> comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator();
     TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> sorted = new TreeMap<>(comparator);
     TreeMap<BytesRef, Long> sortedWithoutPayload = new TreeMap<>(comparator);
+    TreeMap<BytesRef, SimpleEntry<Long, Set<BytesRef>>> sortedWithContext = new TreeMap<>(comparator);
     Input[] unsorted = new Input[num];
     Input[] unsortedWithoutPayload = new Input[num];
-
+    Input[] unsortedWithContexts = new Input[num];
+    Set<BytesRef> ctxs;
     for (int i = 0; i < num; i++) {
       BytesRef key;
       BytesRef payload;
+      ctxs = new HashSet<>();
       do {
         key = new BytesRef(TestUtil.randomUnicodeString(random));
         payload = new BytesRef(TestUtil.randomUnicodeString(random));
+        for(int j = 0; j < atLeast(2); j++) {
+          ctxs.add(new BytesRef(TestUtil.randomUnicodeString(random)));
+        }
       } while (sorted.containsKey(key));
       long value = random.nextLong();
       sortedWithoutPayload.put(key, value);
       sorted.put(key, new SimpleEntry<>(value, payload));
+      sortedWithContext.put(key, new SimpleEntry<>(value, ctxs));
       unsorted[i] = new Input(key, value, payload);
       unsortedWithoutPayload[i] = new Input(key, value);
+      unsortedWithContexts[i] = new Input(key, value, ctxs);
     }
     
     // test the sorted iterator wrapper with payloads
@@ -74,6 +84,18 @@ public class TestInputIterator extends L
     }
     assertNull(wrapper.next());
     
+    // test the sorted iterator wrapper with contexts
+    wrapper = new SortedInputIterator(new InputArrayIterator(unsortedWithContexts), comparator);
+    Iterator<Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>>> actualEntries = sortedWithContext.entrySet().iterator();
+    while (actualEntries.hasNext()) {
+      Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>> entry = actualEntries.next();
+      assertEquals(entry.getKey(), wrapper.next());
+      assertEquals(entry.getValue().getKey().longValue(), wrapper.weight());
+      Set<BytesRef> actualCtxs = entry.getValue().getValue();
+      assertEquals(actualCtxs, wrapper.contexts());
+    }
+    assertNull(wrapper.next());
+    
     // test the unsorted iterator wrapper with payloads
     wrapper = new UnsortedInputIterator(new InputArrayIterator(unsorted));
     TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> actual = new TreeMap<>();