You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/10/19 21:04:34 UTC

svn commit: r1533809 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/suggest/ lucene/suggest/src/java/org/apache/lucene/search/suggest/ lucene/suggest/src/test/org/apache/lucene/search/suggest/

Author: mikemccand
Date: Sat Oct 19 19:04:34 2013
New Revision: 1533809

URL: http://svn.apache.org/r1533809
Log:
LUCENE-4998: add DocumentExpressionDictionary, to compute each suggestion's weight using a javascript expression

Added:
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java
      - copied unchanged from r1533808, lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentExpressionDictionary.java
    lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java
      - copied, changed from r1533808, lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/   (props changed)
    lucene/dev/branches/branch_4x/lucene/suggest/build.xml
    lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1533809&r1=1533808&r2=1533809&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Sat Oct 19 19:04:34 2013
@@ -55,6 +55,10 @@ New Features
 
 * LUCENE-5269: Add CodepointCountFilter. (Robert Muir)
 
+* LUCENE-5294: Suggest module: add DocumentExpressionDictionary to
+  compute each suggestion's weight using a javascript expression.
+  (Areek Zillur via Mike McCandless)
+
 Bug Fixes
 
 * LUCENE-4998: Fixed a few places to pass IOContext.READONCE instead

Modified: lucene/dev/branches/branch_4x/lucene/suggest/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/build.xml?rev=1533809&r1=1533808&r2=1533809&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/build.xml (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/build.xml Sat Oct 19 19:04:34 2013
@@ -31,6 +31,9 @@
   <path id="classpath">
     <pathelement path="${analyzers-common.jar}"/>
     <pathelement path="${misc.jar}"/>
+    <pathelement path="${expressions.jar}"/>
+    <pathelement path="${queries.jar}"/>
+    <fileset dir="${common.dir}/expressions/lib"/>
     <path refid="base.classpath"/>
   </path>
 
@@ -43,6 +46,6 @@
     </invoke-module-javadoc>
   </target>
 
-  <target name="compile-core" depends="jar-misc, jar-analyzers-common, common.compile-core" />
+  <target name="compile-core" depends="jar-expressions, jar-misc, jar-analyzers-common, common.compile-core" />
 
 </project>

Modified: lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java?rev=1533809&r1=1533808&r2=1533809&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java Sat Oct 19 19:04:34 2013
@@ -46,10 +46,13 @@ import org.apache.lucene.util.BytesRefIt
  */
 public class DocumentDictionary implements Dictionary {
   
-  private final IndexReader reader;
+  /** {@link IndexReader} to load documents from */
+  protected final IndexReader reader;
+
+  /** Field to read payload from */
+  protected final String payloadField;
   private final String field;
   private final String weightField;
-  private final String payloadField;
   
   /**
    * Creates a new dictionary with the contents of the fields named <code>field</code>
@@ -80,8 +83,9 @@ public class DocumentDictionary implemen
   public BytesRefIterator getWordsIterator() throws IOException {
     return new DocumentInputIterator(payloadField!=null);
   }
-    
-  final class DocumentInputIterator implements InputIterator {
+
+  /** Implements {@link InputIterator} from stored fields. */
+  protected class DocumentInputIterator implements InputIterator {
     private final int docCount;
     private final Set<String> relevantFields;
     private final boolean hasPayloads;
@@ -89,6 +93,7 @@ public class DocumentDictionary implemen
     private int currentDocId = -1;
     private long currentWeight;
     private BytesRef currentPayload;
+    private Document doc;
     
     /**
      * Creates an iterator over term, weight and payload fields from the lucene
@@ -100,13 +105,7 @@ public class DocumentDictionary implemen
       this.hasPayloads = hasPayloads;
       currentPayload = null;
       liveDocs = MultiFields.getLiveDocs(reader);
-      List<String> relevantFieldList;
-      if(hasPayloads) {
-        relevantFieldList = Arrays.asList(field, weightField, payloadField);
-      } else {
-        relevantFieldList = Arrays.asList(field, weightField);
-      }
-      this.relevantFields = new HashSet<String>(relevantFieldList);
+      this.relevantFields = getRelevantFields(new String [] {field, weightField, payloadField});
     }
 
     @Override
@@ -127,7 +126,7 @@ public class DocumentDictionary implemen
           continue;
         }
 
-        Document doc = reader.document(currentDocId, relevantFields);
+        doc = reader.document(currentDocId, relevantFields);
         
         if (hasPayloads) {
           IndexableField payload = doc.getField(payloadField);
@@ -139,13 +138,7 @@ public class DocumentDictionary implemen
           currentPayload = payload.binaryValue();
         }
         
-        IndexableField weight = doc.getField(weightField);
-        if (weight == null) {
-          throw new IllegalArgumentException(weightField + " does not exist");
-        } else if (weight.numericValue() == null) {
-          throw new IllegalArgumentException(weightField + " does not have numeric value");
-        }
-        currentWeight = weight.numericValue().longValue();
+        currentWeight = getWeight(currentDocId);
         
         IndexableField fieldVal = doc.getField(field);
         if (fieldVal == null) {
@@ -168,6 +161,26 @@ public class DocumentDictionary implemen
     public boolean hasPayloads() {
       return hasPayloads;
     }
+
+    /** Return the suggestion weight for this document */
+    protected long getWeight(int docId) {
+      IndexableField weight = doc.getField(weightField);
+      if (weight == null) {
+        throw new IllegalArgumentException(weightField + " does not exist");
+      } else if (weight.numericValue() == null) {
+        throw new IllegalArgumentException(weightField + " does not have numeric value");
+      }
+      return weight.numericValue().longValue();
+    }
     
+    private Set<String> getRelevantFields(String... fields) {
+      Set<String> relevantFields = new HashSet<String>();
+      for (String relevantField : fields) {
+        if (relevantField != null) {
+          relevantFields.add(relevantField);
+        }
+      }
+      return relevantFields;
+    }
   }
 }

Copied: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java (from r1533808, lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java?p2=lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java&p1=lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java&r1=1533808&r2=1533809&rev=1533809&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentExpressionDictionaryTest.java Sat Oct 19 19:04:34 2013
@@ -53,7 +53,7 @@ public class DocumentExpressionDictionar
   static final String PAYLOAD_FIELD_NAME = "p1";
   
   private Map<String, Document> generateIndexDocuments(int ndocs) {
-    Map<String, Document> docs = new HashMap<>();
+    Map<String, Document> docs = new HashMap<String, Document>();
     for(int i = 0; i < ndocs ; i++) {
       Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
       Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
@@ -147,7 +147,7 @@ public class DocumentExpressionDictionar
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
     Map<String, Document> docs = generateIndexDocuments(10);
     Random rand = random();
-    List<String> termsToDel = new ArrayList<>();
+    List<String> termsToDel = new ArrayList<String>();
     for(Document doc : docs.values()) {
       if(rand.nextBoolean()) {
         termsToDel.add(doc.get(FIELD_NAME));