You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/11/21 11:13:42 UTC
svn commit: r1640887 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/suggest/ lucene/suggest/src/java/org/apache/lucene/search/suggest/
lucene/suggest/src/test/org/apache/lucene/search/suggest/
Author: mikemccand
Date: Fri Nov 21 10:13:41 2014
New Revision: 1640887
URL: http://svn.apache.org/r1640887
Log:
LUCENE-5833: allow suggesters to build off of each value from multi-valued fields
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/suggest/ (props changed)
lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1640887&r1=1640886&r2=1640887&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Fri Nov 21 10:13:41 2014
@@ -88,6 +88,11 @@ New Features
stalls incoming threads when merges are falling behind (Mike
McCandless)
+* LUCENE-5833: DocumentDictionary now enumerates each value separately
+ in a multi-valued field (not just the first value), so you can build
+ suggesters from multi-valued fields. (Varun Thacker via Mike
+ McCandless)
+
API Changes
* LUCENE-5900: Deprecated more constructors taking Version in *InfixSuggester and
Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java?rev=1640887&r1=1640886&r2=1640887&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java Fri Nov 21 10:13:41 2014
@@ -16,6 +16,7 @@ package org.apache.lucene.search.suggest
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
@@ -30,6 +31,8 @@ import org.apache.lucene.search.spell.Di
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
+
+
/**
* <p>
* Dictionary with terms, weights, payload (optional) and contexts (optional)
@@ -115,6 +118,8 @@ public class DocumentDictionary implemen
private BytesRef currentPayload = null;
private Set<BytesRef> currentContexts;
private final NumericDocValues weightValues;
+ IndexableField[] currentDocFields = new IndexableField[0];
+ int nextFieldsPosition = 0;
/**
* Creates an iterator over term, weight and payload fields from the lucene
@@ -137,7 +142,24 @@ public class DocumentDictionary implemen
@Override
public BytesRef next() throws IOException {
- while (currentDocId < docCount) {
+ while (true) {
+ if (nextFieldsPosition < currentDocFields.length) {
+ // Still values left from the document
+ IndexableField fieldValue = currentDocFields[nextFieldsPosition++];
+ if (fieldValue.binaryValue() != null) {
+ return fieldValue.binaryValue();
+ } else if (fieldValue.stringValue() != null) {
+ return new BytesRef(fieldValue.stringValue());
+ } else {
+ continue;
+ }
+ }
+
+ if (currentDocId == docCount) {
+ // Iterated over all the documents.
+ break;
+ }
+
currentDocId++;
if (liveDocs != null && !liveDocs.get(currentDocId)) {
continue;
@@ -145,34 +167,51 @@ public class DocumentDictionary implemen
Document doc = reader.document(currentDocId, relevantFields);
- BytesRef tempPayload = null;
- BytesRef tempTerm = null;
Set<BytesRef> tempContexts = new HashSet<>();
+ BytesRef tempPayload;
if (hasPayloads) {
IndexableField payload = doc.getField(payloadField);
- if (payload == null || (payload.binaryValue() == null && payload.stringValue() == null)) {
+ if (payload == null) {
+ continue;
+ } else if (payload.binaryValue() != null) {
+ tempPayload = payload.binaryValue();
+ } else if (payload.stringValue() != null) {
+ tempPayload = new BytesRef(payload.stringValue());
+ } else {
continue;
}
- tempPayload = (payload.binaryValue() != null) ? payload.binaryValue() : new BytesRef(payload.stringValue());
+ } else {
+ tempPayload = null;
}
if (hasContexts) {
final IndexableField[] contextFields = doc.getFields(contextsField);
for (IndexableField contextField : contextFields) {
- if (contextField.binaryValue() == null && contextField.stringValue() == null) {
- continue;
+ if (contextField.binaryValue() != null) {
+ tempContexts.add(contextField.binaryValue());
+ } else if (contextField.stringValue() != null) {
+ tempContexts.add(new BytesRef(contextField.stringValue()));
} else {
- tempContexts.add((contextField.binaryValue() != null) ? contextField.binaryValue() : new BytesRef(contextField.stringValue()));
+ continue;
}
}
}
- IndexableField fieldVal = doc.getField(field);
- if (fieldVal == null || (fieldVal.binaryValue() == null && fieldVal.stringValue() == null)) {
+ currentDocFields = doc.getFields(field);
+ nextFieldsPosition = 0;
+ if (currentDocFields.length == 0) { // no values in this document
+ continue;
+ }
+ IndexableField fieldValue = currentDocFields[nextFieldsPosition++];
+ BytesRef tempTerm;
+ if (fieldValue.binaryValue() != null) {
+ tempTerm = fieldValue.binaryValue();
+ } else if (fieldValue.stringValue() != null) {
+ tempTerm = new BytesRef(fieldValue.stringValue());
+ } else {
continue;
}
- tempTerm = (fieldVal.stringValue() != null) ? new BytesRef(fieldVal.stringValue()) : fieldVal.binaryValue();
currentPayload = tempPayload;
currentContexts = tempContexts;
@@ -180,6 +219,7 @@ public class DocumentDictionary implemen
return tempTerm;
}
+
return null;
}
Modified: lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java?rev=1640887&r1=1640886&r2=1640887&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java (original)
+++ lucene/dev/branches/branch_5x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java Fri Nov 21 10:13:41 2014
@@ -5,6 +5,7 @@ import java.util.AbstractMap.SimpleEntry
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
@@ -298,4 +299,89 @@ public class DocumentDictionaryTest exte
ir.close();
dir.close();
}
+
+ @Test
+ public void testMultiValuedField() throws IOException {
+ Directory dir = newDirectory();
+ IndexWriterConfig iwc = newIndexWriterConfig(random(), new MockAnalyzer(random()));
+ iwc.setMergePolicy(newLogMergePolicy());
+ RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
+
+ List<Suggestion> suggestions = indexMultiValuedDocuments(atLeast(1000), writer);
+ writer.commit();
+ writer.close();
+
+ IndexReader ir = DirectoryReader.open(dir);
+ Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
+ InputIterator inputIterator = dictionary.getEntryIterator();
+ BytesRef f;
+ Iterator<Suggestion> suggestionsIter = suggestions.iterator();
+ while((f = inputIterator.next())!=null) {
+ Suggestion nextSuggestion = suggestionsIter.next();
+ assertTrue(f.equals(nextSuggestion.term));
+ long weight = nextSuggestion.weight;
+ assertEquals(inputIterator.weight(), (weight != -1) ? weight : 0);
+ assertTrue(inputIterator.payload().equals(nextSuggestion.payload));
+ assertTrue(inputIterator.contexts().equals(nextSuggestion.contexts));
+ }
+ assertFalse(suggestionsIter.hasNext());
+ ir.close();
+ dir.close();
+ }
+
+ private List<Suggestion> indexMultiValuedDocuments(int numDocs, RandomIndexWriter writer) throws IOException {
+ List<Suggestion> suggestionList = new ArrayList<>(numDocs);
+
+ for(int i=0; i<numDocs; i++) {
+ Document doc = new Document();
+ Field field;
+ BytesRef payloadValue;
+ Set<BytesRef> contextValues = new HashSet<>();
+ long numericValue = -1; //-1 for missing weight
+ BytesRef term;
+
+ payloadValue = new BytesRef("payload_" + i);
+ field = new StoredField(PAYLOAD_FIELD_NAME, payloadValue);
+ doc.add(field);
+
+ if (usually()) {
+ numericValue = 100 + i;
+ field = new NumericDocValuesField(WEIGHT_FIELD_NAME, numericValue);
+ doc.add(field);
+ }
+
+ int numContexts = atLeast(1);
+ for (int j=0; j<numContexts; j++) {
+ BytesRef contextValue = new BytesRef("context_" + i + "_" + j);
+ field = new StoredField(CONTEXT_FIELD_NAME, contextValue);
+ doc.add(field);
+ contextValues.add(contextValue);
+ }
+
+ int numSuggestions = atLeast(2);
+ for (int j=0; j<numSuggestions; j++) {
+ term = new BytesRef("field_" + i + "_" + j);
+ field = new StoredField(FIELD_NAME, term);
+ doc.add(field);
+
+ Suggestion suggestionValue = new Suggestion();
+ suggestionValue.payload = payloadValue;
+ suggestionValue.contexts = contextValues;
+ suggestionValue.weight = numericValue;
+ suggestionValue.term = term;
+ suggestionList.add(suggestionValue);
+ }
+ writer.addDocument(doc);
+ }
+ return suggestionList;
+ }
+
+ private class Suggestion {
+ private long weight;
+ private BytesRef payload;
+ private Set<BytesRef> contexts;
+ private BytesRef term;
+ }
+
+
}