You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by bu...@apache.org on 2007/11/23 22:17:35 UTC

svn commit: r597741 - in /lucene/java/trunk: ./ src/java/org/apache/lucene/analysis/ src/java/org/apache/lucene/index/ src/java/org/apache/lucene/search/ src/java/org/apache/lucene/search/payloads/ src/test/org/apache/lucene/index/ src/test/org/apache/...

Author: buschmi
Date: Fri Nov 23 13:17:32 2007
New Revision: 597741

URL: http://svn.apache.org/viewvc?rev=597741&view=rev
Log:
LUCENE-1062: Add setData(byte[] data), setData(byte[] data, int offset, int length), getData(), getOffset() and clone() methods to o.a.l.index.Payload. Also add the field name as arg to Similarity.scorePayload().

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java
    lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java
    lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java
    lucene/java/trunk/src/java/org/apache/lucene/index/TermPositions.java
    lucene/java/trunk/src/java/org/apache/lucene/search/Similarity.java
    lucene/java/trunk/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java
    lucene/java/trunk/src/java/org/apache/lucene/search/payloads/package.html
    lucene/java/trunk/src/test/org/apache/lucene/index/TestPayloads.java
    lucene/java/trunk/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=597741&r1=597740&r2=597741&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Nov 23 13:17:32 2007
@@ -62,6 +62,11 @@
     
  7. LUCENE-743: Add IndexReader.reopen() method that re-opens an
     existing IndexReader (see New features -> 9.) (Michael Busch)
+
+ 8. LUCENE-1062: Add setData(byte[] data), 
+    setData(byte[] data, int offset, int length), getData(), getOffset()
+    and clone() methods to o.a.l.index.Payload. Also add the field name 
+    as arg to Similarity.scorePayload(). (Michael Busch)
     
 Bug fixes
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java?rev=597741&r1=597740&r2=597741&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java Fri Nov 23 13:17:32 2007
@@ -73,9 +73,6 @@
 
   @see org.apache.lucene.index.Payload
 */
-
-// TODO: Remove warning after API has been finalized
-
 public class Token implements Cloneable {
 
   private static final String DEFAULT_TYPE = "word";
@@ -321,25 +318,15 @@
   }
 
   /** 
-   * Returns this Token's payload. 
-   * <p><font color="#FF0000">
-   * WARNING: The status of the <b>Payloads</b> feature is experimental. 
-   * The APIs introduced here might change in the future and will not be 
-   * supported anymore in such a case.</font>
-   */
-  // TODO: Remove warning after API has been finalized
+   * Returns this Token's payload.
+   */ 
   public Payload getPayload() {
     return this.payload;
   }
 
   /** 
    * Sets this Token's payload.
-   * <p><font color="#FF0000">
-   * WARNING: The status of the <b>Payloads</b> feature is experimental. 
-   * The APIs introduced here might change in the future and will not be 
-   * supported anymore in such a case.</font>
    */
-  // TODO: Remove warning after API has been finalized
   public void setPayload(Payload payload) {
     this.payload = payload;
   }
@@ -380,6 +367,9 @@
       if (termBuffer != null) {
         t.termBuffer = null;
         t.setTermBuffer(termBuffer, 0, termLength);
+      }
+      if (payload != null) {
+        t.setPayload((Payload) payload.clone());
       }
       return t;
     } catch (CloneNotSupportedException e) {

Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java?rev=597741&r1=597740&r2=597741&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/TokenStream.java Fri Nov 23 13:17:32 2007
@@ -46,8 +46,9 @@
 
     if (result != null) {
       Payload p = result.getPayload();
-      if (p != null)
-        result.setPayload(new Payload(p.toByteArray(), 0, p.length()));
+      if (p != null) {
+        result.setPayload((Payload) p.clone());
+      }
     }
 
     return result;
@@ -74,7 +75,7 @@
    *  implement this method. Reset() is not needed for
    *  the standard indexing process. However, if the Tokens 
    *  of a TokenStream are intended to be consumed more than 
-   *  once, it is neccessary to implement reset(). 
+   *  once, it is necessary to implement reset(). 
    */
   public void reset() throws IOException {}
   

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java?rev=597741&r1=597740&r2=597741&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/Payload.java Fri Nov 23 13:17:32 2007
@@ -32,15 +32,9 @@
   *  <p>
   *  Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)}
   *  to retrieve the payloads from the index.<br>
-  *  <br>
-  *  
-  * <p><font color="#FF0000">
-  * WARNING: The status of the <b>Payloads</b> feature is experimental. 
-  * The APIs introduced here might change in the future and will not be 
-  * supported anymore in such a case.</font>
-  */    
-  // TODO: Remove warning after API has been finalized
-  public class Payload implements Serializable {
+  *
+  */
+  public class Payload implements Serializable, Cloneable {
     /** the byte array containing the payload data */
     protected byte[] data;
     
@@ -51,12 +45,14 @@
     protected int length;
     
     /** Creates an empty payload and does not allocate a byte array. */
-    protected Payload() {
-      // no-arg constructor since this class implements Serializable
+    public Payload() {
+      // nothing to do
     }
     
     /**
      * Creates a new payload with the the given array as data.
+     * A reference to the passed-in array is held, i. e. no 
+     * copy is made.
      * 
      * @param data the data of this payload
      */
@@ -66,6 +62,8 @@
 
     /**
      * Creates a new payload with the the given array as data. 
+     * A reference to the passed-in array is held, i. e. no 
+     * copy is made.
      * 
      * @param data the data of this payload
      * @param offset the offset in the data byte array
@@ -81,6 +79,41 @@
     }
     
     /**
+     * Sets this payloads data. 
+     * A reference to the passed-in array is held, i. e. no 
+     * copy is made.
+     */
+    public void setData(byte[] data) {
+      setData(data, 0, data.length);
+    }
+
+    /**
+     * Sets this payloads data. 
+     * A reference to the passed-in array is held, i. e. no 
+     * copy is made.
+     */
+    public void setData(byte[] data, int offset, int length) {
+      this.data = data;
+      this.offset = offset;
+      this.length = length;
+    }
+    
+    /**
+     * Returns a reference to the underlying byte array
+     * that holds this payloads data.
+     */
+    public byte[] getData() {
+      return this.data;
+    }
+    
+    /**
+     * Returns the offset in the underlying byte array 
+     */
+    public int getOffset() {
+      return this.offset;
+    }
+    
+    /**
      * Returns the length of the payload data. 
      */
     public int length() {
@@ -117,5 +150,14 @@
         throw new ArrayIndexOutOfBoundsException();
       }
       System.arraycopy(this.data, this.offset, target, targetOffset, this.length);
+    }
+
+    /**
+     * Clones this payload by creating a copy of the underlying
+     * byte array.
+     */
+    public Object clone() {
+      Payload clone = new Payload(this.toByteArray());
+      return clone;
     }
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermPositions.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermPositions.java?rev=597741&r1=597740&r2=597741&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermPositions.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermPositions.java Fri Nov 23 13:17:32 2007
@@ -43,14 +43,8 @@
      * Returns the length of the payload at the current term position.
      * This is invalid until {@link #nextPosition()} is called for
      * the first time.<br>
-     * <br>
-     * <p><font color="#FF0000">
-     * WARNING: The status of the <b>Payloads</b> feature is experimental. 
-     * The APIs introduced here might change in the future and will not be 
-     * supported anymore in such a case.</font>
      * @return length of the current payload in number of bytes
      */
-    // TODO: Remove warning after API has been finalized
     int getPayloadLength();
     
     /** 
@@ -61,11 +55,6 @@
      * of {@link #nextPosition()}. However, payloads are loaded lazily,
      * so if the payload data for the current position is not needed,
      * this method may not be called at all for performance reasons.<br>
-     * <br>
-     * <p><font color="#FF0000">
-     * WARNING: The status of the <b>Payloads</b> feature is experimental. 
-     * The APIs introduced here might change in the future and will not be 
-     * supported anymore in such a case.</font>
      * 
      * @param data the array into which the data of this payload is to be
      *             stored, if it is big enough; otherwise, a new byte[] array
@@ -75,7 +64,6 @@
      * @return a byte[] array containing the data of this payload
      * @throws IOException
      */
-    // TODO: Remove warning after API has been finalized
     byte[] getPayload(byte[] data, int offset) throws IOException;
 
   /**
@@ -84,14 +72,8 @@
    * Payloads can only be loaded once per call to 
    * {@link #nextPosition()}.
    * 
-   * <p><font color="#FF0000">
-   * WARNING: The status of the <b>Payloads</b> feature is experimental. 
-   * The APIs introduced here might change in the future and will not be 
-   * supported anymore in such a case.</font>
-   * 
    * @return true if there is a payload available at this position that can be loaded
    */
-   // TODO: Remove warning after API has been finalized
     public boolean isPayloadAvailable();
 
 }

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/Similarity.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/Similarity.java?rev=597741&r1=597740&r2=597741&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/Similarity.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/Similarity.java Fri Nov 23 13:17:32 2007
@@ -513,19 +513,14 @@
    * what is in the byte array.
    * <p>
    * The default implementation returns 1.
-   * <br>
-   * <p><font color="#FF0000">
-   * WARNING: The status of the <b>Payloads</b> feature is experimental. 
-   * The APIs introduced here might change in the future and will not be 
-   * supported anymore in such a case.</font>
    *
+   * @param fieldName The fieldName of the term this payload belongs to
    * @param payload The payload byte array to be scored
    * @param offset The offset into the payload array
    * @param length The length in the array
    * @return An implementation dependent float to be used as a scoring factor 
    */
-  // TODO: Remove warning after API has been finalized
-  public float scorePayload(byte [] payload, int offset, int length)
+  public float scorePayload(String fieldName, byte [] payload, int offset, int length)
   {
     //Do nothing
     return 1;

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java?rev=597741&r1=597740&r2=597741&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/payloads/BoostingTermQuery.java Fri Nov 23 13:17:32 2007
@@ -37,11 +37,6 @@
  * <p>
  * Payload scores are averaged across term occurrences in the document.  
  * 
- * <p><font color="#FF0000">
- * WARNING: The status of the <b>Payloads</b> feature is experimental. 
- * The APIs introduced here might change in the future and will not be 
- * supported anymore in such a case.</font>
- *
  * @see org.apache.lucene.search.Similarity#scorePayload(byte[], int, int)
  */
 public class BoostingTermQuery extends SpanTermQuery{
@@ -136,7 +131,7 @@
       protected void processPayload(Similarity similarity) throws IOException {
         if (positions.isPayloadAvailable()) {
           payload = positions.getPayload(payload, 0);
-          payloadScore += similarity.scorePayload(payload, 0, positions.getPayloadLength());
+          payloadScore += similarity.scorePayload(term.field(), payload, 0, positions.getPayloadLength());
           payloadsSeen++;
 
         } else {

Modified: lucene/java/trunk/src/java/org/apache/lucene/search/payloads/package.html
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/payloads/package.html?rev=597741&r1=597740&r2=597741&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/payloads/package.html (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/payloads/package.html Fri Nov 23 13:17:32 2007
@@ -29,13 +29,6 @@
     <li><a href="./BoostingTermQuery.html">BoostingTermQuery</a> -- Boost a term's score based on the value of the payload located at that term</li>
   </ol>
 </div>
-<DIV>
-<font color="#FF0000">
-WARNING: The status of the <b>Payloads</b> feature is experimental. The APIs
-introduced here might change in the future and will not be supported anymore
-in such a case.
-</font>
-</DIV>
 <DIV>&nbsp;</DIV>
 <DIV align="center">
 </DIV>

Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestPayloads.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestPayloads.java?rev=597741&r1=597740&r2=597741&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestPayloads.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestPayloads.java Fri Nov 23 13:17:32 2007
@@ -81,6 +81,13 @@
         } catch (Exception expected) {
             // expected exception
         }
+        
+        Payload clone = (Payload) payload.clone();
+        assertEquals(payload.length(), clone.length());
+        for (int i = 0; i < payload.length(); i++) {
+          assertEquals(payload.byteAt(i), clone.byteAt(i));
+        }
+        
     }
 
     // Tests whether the DocumentWriter and SegmentMerger correctly enable the
@@ -429,6 +436,7 @@
         private byte[] data;
         private int length;
         private int offset;
+        Payload payload = new Payload();
         
         public PayloadFilter(TokenStream in, byte[] data, int offset, int length) {
             super(in);
@@ -437,14 +445,23 @@
             this.offset = offset;
         }
         
-        public Token next() throws IOException {
-            Token nextToken = input.next();
-            if (nextToken != null && offset + length <= data.length) {
-              nextToken.setPayload(new Payload(data, offset, length));
-              offset += length;
-            }            
+        public Token next(Token token) throws IOException {
+            token = input.next(token);
+            if (token != null) {
+                if (offset + length <= data.length) {
+                    Payload p = null;
+                    if (p == null) {
+                        p = new Payload();
+                        token.setPayload(p);
+                    }
+                    p.setData(data, offset, length);
+                    offset += length;                
+                } else {
+                    token.setPayload(null);
+                }
+            }
             
-            return nextToken;
+            return token;
         }
     }
     

Modified: lucene/java/trunk/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java?rev=597741&r1=597740&r2=597741&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/payloads/TestBoostingTermQuery.java Fri Nov 23 13:17:32 2007
@@ -196,7 +196,7 @@
   static class BoostingSimilarity extends DefaultSimilarity {
 
     // TODO: Remove warning after API has been finalized
-    public float scorePayload(byte[] payload, int offset, int length) {
+    public float scorePayload(String fieldName, byte[] payload, int offset, int length) {
       //we know it is size 4 here, so ignore the offset/length
       return payload[0];
     }