You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/21 11:13:42 UTC

svn commit: r1640887 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/suggest/ lucene/suggest/src/java/org/apache/lucene/search/suggest/ lucene/suggest/src/test/org/apache/lucene/search/suggest/

Author: mikemccand
Date: Fri Nov 21 10:13:41 2014
New Revision: 1640887

URL: http://svn.apache.org/r1640887
Log:
LUCENE-5833: allow suggesters to build off of each value from multi-valued fields

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
    lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1640887&r1=1640886&r2=1640887&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Fri Nov 21 10:13:41 2014
@@ -88,6 +88,11 @@ New Features
   stalls incoming threads when merges are falling behind (Mike
   McCandless)
 
+* LUCENE-5833: DocumentDictionary now enumerates each value separately
+  in a multi-valued field (not just the first value), so you can build
+  suggesters from multi-valued fields.  (Varun Thacker via Mike
+  McCandless)
+
 API Changes
 
 * LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java?rev=1640887&r1=1640886&r2=1640887&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java Fri Nov 21 10:13:41 2014
@@ -16,6 +16,7 @@ package org.apache.lucene.search.suggest
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
@@ -30,6 +31,8 @@ import org.apache.lucene.search.spell.Di
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 
+
+
 /**
  * <p>
  * Dictionary with terms, weights, payload (optional) and contexts (optional)
@@ -115,6 +118,8 @@ public class DocumentDictionary implemen
     private BytesRef currentPayload = null;
     private Set<BytesRef> currentContexts;
     private final NumericDocValues weightValues;
+    IndexableField[] currentDocFields = new IndexableField[0];
+    int nextFieldsPosition = 0;
 
     /**
      * Creates an iterator over term, weight and payload fields from the lucene
@@ -137,7 +142,24 @@ public class DocumentDictionary implemen
 
     @Override
     public BytesRef next() throws IOException {
-      while (currentDocId < docCount) {
+      while (true) {
+        if (nextFieldsPosition < currentDocFields.length) {
+          // Still values left from the document
+          IndexableField fieldValue = currentDocFields[nextFieldsPosition++];
+          if (fieldValue.binaryValue() != null) {
+            return fieldValue.binaryValue();
+          } else if (fieldValue.stringValue() != null) {
+            return new BytesRef(fieldValue.stringValue());
+          } else {
+            continue;
+          }
+        }
+
+        if (currentDocId == docCount) {
+          // Iterated over all the documents.
+          break;
+        }
+
         currentDocId++;
         if (liveDocs != null && !liveDocs.get(currentDocId)) { 
           continue;
@@ -145,34 +167,51 @@ public class DocumentDictionary implemen
 
         Document doc = reader.document(currentDocId, relevantFields);
 
-        BytesRef tempPayload = null;
-        BytesRef tempTerm = null;
         Set<BytesRef> tempContexts = new HashSet<>();
 
+        BytesRef tempPayload;
         if (hasPayloads) {
           IndexableField payload = doc.getField(payloadField);
-          if (payload == null || (payload.binaryValue() == null && payload.stringValue() == null)) {
+          if (payload == null) {
+            continue;
+          } else if (payload.binaryValue() != null) {
+            tempPayload =  payload.binaryValue();
+          } else if (payload.stringValue() != null) {
+            tempPayload = new BytesRef(payload.stringValue());
+          } else {
             continue;
           }
-          tempPayload = (payload.binaryValue() != null) ? payload.binaryValue() : new BytesRef(payload.stringValue());
+        } else {
+          tempPayload = null;
         }
 
         if (hasContexts) {
           final IndexableField[] contextFields = doc.getFields(contextsField);
           for (IndexableField contextField : contextFields) {
-            if (contextField.binaryValue() == null && contextField.stringValue() == null) {
-              continue;
+            if (contextField.binaryValue() != null) {
+              tempContexts.add(contextField.binaryValue());
+            } else if (contextField.stringValue() != null) {
+              tempContexts.add(new BytesRef(contextField.stringValue()));
             } else {
-              tempContexts.add((contextField.binaryValue() != null) ? contextField.binaryValue() : new BytesRef(contextField.stringValue()));
+              continue;
             }
           }
         }
 
-        IndexableField fieldVal = doc.getField(field);
-        if (fieldVal == null || (fieldVal.binaryValue() == null && fieldVal.stringValue() == null)) {
+        currentDocFields = doc.getFields(field);
+        nextFieldsPosition = 0;
+        if (currentDocFields.length == 0) { // no values in this document
+          continue;
+        }
+        IndexableField fieldValue = currentDocFields[nextFieldsPosition++];
+        BytesRef tempTerm;
+        if (fieldValue.binaryValue() != null) {
+          tempTerm = fieldValue.binaryValue();
+        } else if (fieldValue.stringValue() != null) {
+          tempTerm = new BytesRef(fieldValue.stringValue());
+        } else {
           continue;
         }
-        tempTerm = (fieldVal.stringValue() != null) ? new BytesRef(fieldVal.stringValue()) : fieldVal.binaryValue();
 
         currentPayload = tempPayload;
         currentContexts = tempContexts;
@@ -180,6 +219,7 @@ public class DocumentDictionary implemen
 
         return tempTerm;
       }
+
       return null;
     }
 

Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java?rev=1640887&r1=1640886&r2=1640887&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java Fri Nov 21 10:13:41 2014
@@ -5,6 +5,7 @@ import java.util.AbstractMap.SimpleEntry
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
@@ -298,4 +299,89 @@ public class DocumentDictionaryTest exte
     ir.close();
     dir.close();
   }
+
+  @Test
+  public void testMultiValuedField() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = newIndexWriterConfig(random(), new MockAnalyzer(random()));
+    iwc.setMergePolicy(newLogMergePolicy());
+    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
+
+    List<Suggestion> suggestions = indexMultiValuedDocuments(atLeast(1000), writer);
+    writer.commit();
+    writer.close();
+
+    IndexReader ir = DirectoryReader.open(dir);
+    Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
+    InputIterator inputIterator = dictionary.getEntryIterator();
+    BytesRef f;
+    Iterator<Suggestion> suggestionsIter = suggestions.iterator();
+    while((f = inputIterator.next())!=null) {
+      Suggestion nextSuggestion = suggestionsIter.next();
+      assertTrue(f.equals(nextSuggestion.term));
+      long weight = nextSuggestion.weight;
+      assertEquals(inputIterator.weight(), (weight != -1) ? weight : 0);
+      assertTrue(inputIterator.payload().equals(nextSuggestion.payload));
+      assertTrue(inputIterator.contexts().equals(nextSuggestion.contexts));
+    }
+    assertFalse(suggestionsIter.hasNext());
+    ir.close();
+    dir.close();
+  }
+
+  private List<Suggestion> indexMultiValuedDocuments(int numDocs, RandomIndexWriter writer) throws IOException {
+    List<Suggestion> suggestionList = new ArrayList<>(numDocs);
+
+    for(int i=0; i<numDocs; i++) {
+      Document doc = new Document();
+      Field field;
+      BytesRef payloadValue;
+      Set<BytesRef> contextValues = new HashSet<>();
+      long numericValue = -1; //-1 for missing weight
+      BytesRef term;
+
+      payloadValue = new BytesRef("payload_" + i);
+      field = new StoredField(PAYLOAD_FIELD_NAME, payloadValue);
+      doc.add(field);
+
+      if (usually()) {
+        numericValue = 100 + i;
+        field = new NumericDocValuesField(WEIGHT_FIELD_NAME, numericValue);
+        doc.add(field);
+      }
+
+      int numContexts = atLeast(1);
+      for (int j=0; j<numContexts; j++) {
+        BytesRef contextValue = new BytesRef("context_" + i + "_" + j);
+        field = new StoredField(CONTEXT_FIELD_NAME, contextValue);
+        doc.add(field);
+        contextValues.add(contextValue);
+      }
+
+      int numSuggestions = atLeast(2);
+      for (int j=0; j<numSuggestions; j++) {
+        term = new BytesRef("field_" + i + "_" + j);
+        field = new StoredField(FIELD_NAME, term);
+        doc.add(field);
+
+        Suggestion suggestionValue = new Suggestion();
+        suggestionValue.payload = payloadValue;
+        suggestionValue.contexts = contextValues;
+        suggestionValue.weight = numericValue;
+        suggestionValue.term = term;
+        suggestionList.add(suggestionValue);
+      }
+      writer.addDocument(doc);
+    }
+    return suggestionList;
+  }
+
+  private class Suggestion {
+    private long weight;
+    private BytesRef payload;
+    private Set<BytesRef> contexts;
+    private BytesRef term;
+  }
+
+
 }