You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/10/19 21:04:34 UTC
svn commit: r1533809 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/suggest/ lucene/suggest/src/java/org/apache/lucene/search/suggest/
lucene/suggest/src/test/org/apache/lucene/search/suggest/
Author: mikemccand
Date: Sat Oct 19 19:04:34 2013
New Revision: 1533809
URL: http://svn.apache.org/r1533809
Log:
LUCENE-4998: add DocumentExpressionDictionary, to compute each suggestion's weight using a javascript expression
Added:
lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java
- copied unchanged from r1533808, lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java
lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java
- copied, changed from r1533808, lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/suggest/ (props changed)
lucene/dev/branches/branch_4x/lucene/suggest/build.xml
lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1533809&r1=1533808&r2=1533809&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Sat Oct 19 19:04:34 2013
@@ -55,6 +55,10 @@ New Features
* LUCENE-5269: Add CodepointCountFilter. (Robert Muir)
+* LUCENE-5294: Suggest module: add DocumentExpressionDictionary to
+ compute each suggestion's weight using a javascript expression.
+ (Areek Zillur via Mike McCandless)
+
Bug Fixes
* LUCENE-4998: Fixed a few places to pass IOContext.READONCE instead
Modified: lucene/dev/branches/branch_4x/lucene/suggest/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/build.xml?rev=1533809&r1=1533808&r2=1533809&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/build.xml (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/build.xml Sat Oct 19 19:04:34 2013
@@ -31,6 +31,9 @@
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<pathelement path="${misc.jar}"/>
+ <pathelement path="${expressions.jar}"/>
+ <pathelement path="${queries.jar}"/>
+ <fileset dir="${common.dir}/expressions/lib"/>
<path refid="base.classpath"/>
</path>
@@ -43,6 +46,6 @@
</invoke-module-javadoc>
</target>
- <target name="compile-core" depends="jar-misc, jar-analyzers-common, common.compile-core" />
+ <target name="compile-core" depends="jar-expressions, jar-misc, jar-analyzers-common, common.compile-core" />
</project>
Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java?rev=1533809&r1=1533808&r2=1533809&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java Sat Oct 19 19:04:34 2013
@@ -46,10 +46,13 @@ import org.apache.lucene.util.BytesRefIt
*/
public class DocumentDictionary implements Dictionary {
- private final IndexReader reader;
+ /** {@link IndexReader} to load documents from */
+ protected final IndexReader reader;
+
+ /** Field to read payload from */
+ protected final String payloadField;
private final String field;
private final String weightField;
- private final String payloadField;
/**
* Creates a new dictionary with the contents of the fields named <code>field</code>
@@ -80,8 +83,9 @@ public class DocumentDictionary implemen
public BytesRefIterator getWordsIterator() throws IOException {
return new DocumentInputIterator(payloadField!=null);
}
-
- final class DocumentInputIterator implements InputIterator {
+
+ /** Implements {@link InputIterator} from stored fields. */
+ protected class DocumentInputIterator implements InputIterator {
private final int docCount;
private final Set<String> relevantFields;
private final boolean hasPayloads;
@@ -89,6 +93,7 @@ public class DocumentDictionary implemen
private int currentDocId = -1;
private long currentWeight;
private BytesRef currentPayload;
+ private Document doc;
/**
* Creates an iterator over term, weight and payload fields from the lucene
@@ -100,13 +105,7 @@ public class DocumentDictionary implemen
this.hasPayloads = hasPayloads;
currentPayload = null;
liveDocs = MultiFields.getLiveDocs(reader);
- List<String> relevantFieldList;
- if(hasPayloads) {
- relevantFieldList = Arrays.asList(field, weightField, payloadField);
- } else {
- relevantFieldList = Arrays.asList(field, weightField);
- }
- this.relevantFields = new HashSet<String>(relevantFieldList);
+ this.relevantFields = getRelevantFields(new String [] {field, weightField, payloadField});
}
@Override
@@ -127,7 +126,7 @@ public class DocumentDictionary implemen
continue;
}
- Document doc = reader.document(currentDocId, relevantFields);
+ doc = reader.document(currentDocId, relevantFields);
if (hasPayloads) {
IndexableField payload = doc.getField(payloadField);
@@ -139,13 +138,7 @@ public class DocumentDictionary implemen
currentPayload = payload.binaryValue();
}
- IndexableField weight = doc.getField(weightField);
- if (weight == null) {
- throw new IllegalArgumentException(weightField + " does not exist");
- } else if (weight.numericValue() == null) {
- throw new IllegalArgumentException(weightField + " does not have numeric value");
- }
- currentWeight = weight.numericValue().longValue();
+ currentWeight = getWeight(currentDocId);
IndexableField fieldVal = doc.getField(field);
if (fieldVal == null) {
@@ -168,6 +161,26 @@ public class DocumentDictionary implemen
public boolean hasPayloads() {
return hasPayloads;
}
+
+ /** Return the suggestion weight for this document */
+ protected long getWeight(int docId) {
+ IndexableField weight = doc.getField(weightField);
+ if (weight == null) {
+ throw new IllegalArgumentException(weightField + " does not exist");
+ } else if (weight.numericValue() == null) {
+ throw new IllegalArgumentException(weightField + " does not have numeric value");
+ }
+ return weight.numericValue().longValue();
+ }
+ private Set<String> getRelevantFields(String... fields) {
+ Set<String> relevantFields = new HashSet<String>();
+ for (String relevantField : fields) {
+ if (relevantField != null) {
+ relevantFields.add(relevantField);
+ }
+ }
+ return relevantFields;
+ }
}
}
Copied: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java (from r1533808, lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java?p2=lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java&p1=lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java&r1=1533808&r2=1533809&rev=1533809&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java Sat Oct 19 19:04:34 2013
@@ -53,7 +53,7 @@ public class DocumentExpressionDictionar
static final String PAYLOAD_FIELD_NAME = "p1";
private Map<String, Document> generateIndexDocuments(int ndocs) {
- Map<String, Document> docs = new HashMap<>();
+ Map<String, Document> docs = new HashMap<String, Document>();
for(int i = 0; i < ndocs ; i++) {
Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
@@ -147,7 +147,7 @@ public class DocumentExpressionDictionar
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
Map<String, Document> docs = generateIndexDocuments(10);
Random rand = random();
- List<String> termsToDel = new ArrayList<>();
+ List<String> termsToDel = new ArrayList<String>();
for(Document doc : docs.values()) {
if(rand.nextBoolean()) {
termsToDel.add(doc.get(FIELD_NAME));