You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by wh...@apache.org on 2006/09/28 09:10:31 UTC

svn commit: r450724 - /lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java

Author: whoschek
Date: Thu Sep 28 00:10:30 2006
New Revision: 450724

URL: http://svn.apache.org/viewvc?view=rev&rev=450724
Log:
added support for per field boost factor

Modified:
    lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java

Modified: lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?view=diff&rev=450724&r1=450723&r2=450724
==============================================================================
--- lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/java/trunk/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Thu Sep 28 00:10:30 2006
@@ -20,6 +20,7 @@
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
@@ -169,6 +170,9 @@
   /** pos: positions[3*i], startOffset: positions[3*i +1], endOffset: positions[3*i +2] */
   private final int stride;
   
+  /** Could be made configurable; See {@link Document#setBoost(float)} */
+  private static final float docBoost = 1.0f;
+  
   private static final long serialVersionUID = 2782195016849084649L;
 
   private static final boolean DEBUG = false;
@@ -274,6 +278,18 @@
   }
   
   /**
+   * Equivalent to <code>addField(fieldName, stream, 1.0f)</code>.
+   * 
+   * @param fieldName
+   *            a name to be associated with the text
+   * @param stream
+   *            the token stream to retrieve tokens from
+   */
+  public void addField(String fieldName, TokenStream stream) {
+	addField(fieldName, stream, 1.0f);
+  }
+
+  /**
    * Iterates over the given token stream and adds the resulting terms to the index;
    * Equivalent to adding a tokenized, indexed, termVectorStored, unstored,
    * Lucene {@link org.apache.lucene.document.Field}.
@@ -284,8 +300,11 @@
    *            a name to be associated with the text
    * @param stream
    *            the token stream to retrieve tokens from.
+   * @param boost
+   *            the boost factor for hits for this field
+   * @see Field#setBoost(float)
    */
-  public void addField(String fieldName, TokenStream stream) {
+  public void addField(String fieldName, TokenStream stream, float boost) {
     /*
      * Note that this method signature avoids having a user call new
      * o.a.l.d.Field(...) which would be much too expensive due to the
@@ -308,7 +327,9 @@
       if (fieldName == null)
         throw new IllegalArgumentException("fieldName must not be null");
       if (stream == null)
-        throw new IllegalArgumentException("token stream must not be null");
+          throw new IllegalArgumentException("token stream must not be null");
+      if (boost <= 0.0f)
+          throw new IllegalArgumentException("boost factor must be greater than 0.0");
       if (fields.get(fieldName) != null)
         throw new IllegalArgumentException("field must not be added more than once");
       
@@ -338,7 +359,8 @@
       
       // ensure infos.numTokens > 0 invariant; needed for correct operation of terms()
       if (numTokens > 0) {
-        fields.put(fieldName, new Info(terms, numTokens));
+        boost = boost * docBoost; // see DocumentWriter.addDocument(...)
+        fields.put(fieldName, new Info(terms, numTokens, boost));
         sortedFields = null;    // invalidate sorted view, if any
       }
     } catch (IOException e) { // can never happen
@@ -435,7 +457,7 @@
     while (iter.hasNext()) { // for each Field Info
       Map.Entry entry = (Map.Entry) iter.next();      
       Info info = (Info) entry.getValue();
-      size += HEADER + 4 + PTR + PTR + PTR; // Info instance vars
+      size += HEADER + 4 + 4 + PTR + PTR + PTR; // Info instance vars
       if (info.sortedTerms != null) size += ARR + PTR * info.sortedTerms.length;
       
       int len = info.terms.size();
@@ -545,14 +567,18 @@
     /** Number of added tokens for this field */
     private final int numTokens;
     
+    /** Boost factor for hits for this field */
+    private final float boost;
+
     /** Term for this field's fieldName, lazily computed on demand */
     public transient Term template;
 
     private static final long serialVersionUID = 2882195016849084649L;  
 
-    public Info(HashMap terms, int numTokens) {
+    public Info(HashMap terms, int numTokens, float boost) {
       this.terms = terms;
       this.numTokens = numTokens;
+      this.boost = boost;
     }
     
     /**
@@ -577,6 +603,10 @@
       return (ArrayIntList) sortedTerms[pos].getValue();
     }
     
+    public float getBoost() {
+      return boost;
+    }
+    
   }
   
   
@@ -970,6 +1000,8 @@
         Info info = getInfo(fieldName);
         int numTokens = info != null ? info.numTokens : 0;
         float n = sim.lengthNorm(fieldName, numTokens);
+        float boost = info != null ? info.getBoost() : 1.0f; 
+        n = n * boost; // see DocumentWriter.writeNorms(String segment)				
         byte norm = Similarity.encodeNorm(n);
         norms = new byte[] {norm};