You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2012/03/07 12:56:14 UTC

svn commit: r1297946 [3/3] - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/contrib/ lucene/contrib/spellchecker/ lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/ lucene/contrib/spellchecker/src/java/org/apache/lucene/search/s...

Modified: lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java?rev=1297946&r1=1297945&r2=1297946&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/spellchecker/src/test/org/apache/lucene/search/suggest/fst/WFSTCompletionTest.java Wed Mar  7 11:56:12 2012
@@ -45,33 +45,33 @@ public class WFSTCompletionTest extends 
     suggester.build(new TermFreqArrayIterator(keys));
     
     // top N of 2, but only foo is available
-    List<LookupResult> results = suggester.lookup("f", false, 2);
+    List<LookupResult> results = suggester.lookup(_TestUtil.stringToCharSequence("f", random), false, 2);
     assertEquals(1, results.size());
-    assertEquals("foo", results.get(0).key);
+    assertEquals("foo", results.get(0).key.toString());
     assertEquals(50, results.get(0).value, 0.01F);
     
     // top N of 1 for 'bar': we return this even though barbar is higher
-    results = suggester.lookup("bar", false, 1);
+    results = suggester.lookup(_TestUtil.stringToCharSequence("bar", random), false, 1);
     assertEquals(1, results.size());
-    assertEquals("bar", results.get(0).key);
+    assertEquals("bar", results.get(0).key.toString());
     assertEquals(10, results.get(0).value, 0.01F);
     
     // top N Of 2 for 'b'
-    results = suggester.lookup("b", false, 2);
+    results = suggester.lookup(_TestUtil.stringToCharSequence("b", random), false, 2);
     assertEquals(2, results.size());
-    assertEquals("barbar", results.get(0).key);
+    assertEquals("barbar", results.get(0).key.toString());
     assertEquals(12, results.get(0).value, 0.01F);
-    assertEquals("bar", results.get(1).key);
+    assertEquals("bar", results.get(1).key.toString());
     assertEquals(10, results.get(1).value, 0.01F);
     
     // top N of 3 for 'ba'
-    results = suggester.lookup("ba", false, 3);
+    results = suggester.lookup(_TestUtil.stringToCharSequence("ba", random), false, 3);
     assertEquals(3, results.size());
-    assertEquals("barbar", results.get(0).key);
+    assertEquals("barbar", results.get(0).key.toString());
     assertEquals(12, results.get(0).value, 0.01F);
-    assertEquals("bar", results.get(1).key);
+    assertEquals("bar", results.get(1).key.toString());
     assertEquals(10, results.get(1).value, 0.01F);
-    assertEquals("barbara", results.get(2).key);
+    assertEquals("barbara", results.get(2).key.toString());
     assertEquals(6, results.get(2).value, 0.01F);
   }
   
@@ -100,7 +100,7 @@ public class WFSTCompletionTest extends 
       // we can probably do Integer.MAX_VALUE here, but why worry.
       int weight = random.nextInt(1<<24);
       slowCompletor.put(s, (long)weight);
-      keys[i] = new TermFreq(s, (float) weight);
+      keys[i] = new TermFreq(s, weight);
     }
 
     WFSTCompletionLookup suggester = new WFSTCompletionLookup(false);
@@ -109,7 +109,7 @@ public class WFSTCompletionTest extends 
     for (String prefix : allPrefixes) {
     
       final int topN = _TestUtil.nextInt(random, 1, 10);
-      List<LookupResult> r = suggester.lookup(prefix, false, topN);
+      List<LookupResult> r = suggester.lookup(_TestUtil.stringToCharSequence(prefix, random), false, topN);
 
       // 2. go thru whole treemap (slowCompletor) and check its actually the best suggestion
       final List<LookupResult> matches = new ArrayList<LookupResult>();
@@ -117,7 +117,7 @@ public class WFSTCompletionTest extends 
       // TODO: could be faster... but its slowCompletor for a reason
       for (Map.Entry<String,Long> e : slowCompletor.entrySet()) {
         if (e.getKey().startsWith(prefix)) {
-          matches.add(new LookupResult(e.getKey(), (float)e.getValue().longValue()));
+          matches.add(new LookupResult(e.getKey(), e.getValue().longValue()));
         }
       }
 
@@ -126,7 +126,7 @@ public class WFSTCompletionTest extends 
         public int compare(LookupResult left, LookupResult right) {
           int cmp = Float.compare(right.value, left.value);
           if (cmp == 0) {
-            return left.key.compareTo(right.key);
+            return left.compareTo(right);
           } else {
             return cmp;
           }
@@ -140,7 +140,7 @@ public class WFSTCompletionTest extends 
 
       for(int hit=0;hit<r.size();hit++) {
         //System.out.println("  check hit " + hit);
-        assertEquals(matches.get(hit).key, r.get(hit).key);
+        assertEquals(matches.get(hit).key.toString(), r.get(hit).key.toString());
         assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
       }
     }

Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java?rev=1297946&r1=1297945&r2=1297946&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java Wed Mar  7 11:56:12 2012
@@ -270,6 +270,37 @@ public final class ByteBlockPool {
   }
   
   /**
+   *
+   */
+  public final BytesRef copyFrom(final BytesRef bytes) {
+    final int length = bytes.length;
+    final int offset = bytes.offset;
+    bytes.offset = 0;
+    bytes.grow(length);
+    int bufferIndex = offset >> BYTE_BLOCK_SHIFT;
+    byte[] buffer = buffers[bufferIndex];
+    int pos = offset & BYTE_BLOCK_MASK;
+    int overflow = (pos + length) - BYTE_BLOCK_SIZE;
+    do {
+      if (overflow <= 0) {
+        System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytes.length);
+        bytes.length = length;
+        bytes.offset = 0;
+        break;
+      } else {
+        final int bytesToCopy = length - overflow;
+        System.arraycopy(buffer, pos, bytes.bytes, bytes.offset, bytesToCopy);
+        pos = 0;
+        bytes.length -= bytesToCopy;
+        bytes.offset += bytesToCopy;
+        buffer = buffers[++bufferIndex];
+        overflow = overflow - BYTE_BLOCK_SIZE;
+      }
+    } while (true);
+    return bytes;
+  }
+  
+  /**
    * Writes the pools content to the given {@link DataOutput}
    */
   public final void writePool(final DataOutput out) throws IOException {

Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/BytesRef.java?rev=1297946&r1=1297945&r2=1297946&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/BytesRef.java Wed Mar  7 11:56:12 2012
@@ -271,13 +271,7 @@ public final class BytesRef implements C
       final byte[] bBytes = b.bytes;
       int bUpto = b.offset;
       
-      final int aStop;
-      if (a.length < b.length) {
-        aStop = aUpto + a.length;
-      } else {
-        aStop = aUpto + b.length;
-      }
-
+      final int aStop = aUpto + Math.min(a.length, b.length);
       while(aUpto < aStop) {
         int aByte = aBytes[aUpto++] & 0xff;
         int bByte = bBytes[bUpto++] & 0xff;

Copied: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java (from r1291418, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java?p2=lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java&p1=lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java&r1=1291418&r2=1297946&rev=1297946&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/util/BytesRefIterator.java Wed Mar  7 11:56:12 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.util;
  */
 
 import java.io.IOException;
+import java.util.Comparator;
 
 /**
  * A simple iterator interface for {@link BytesRef} iteration
@@ -40,13 +41,25 @@ public interface BytesRefIterator {
    */
   public BytesRef next() throws IOException;
   
+  /**
+   * Return the {@link BytesRef} Comparator used to sort terms provided by the
+   * iterator. This may return null if there are no items or the iterator is not
+   * sorted. Callers may invoke this method many times, so it's best to cache a
+   * single instance & reuse it.
+   */
+  public Comparator<BytesRef> getComparator();
+  
   public final static class EmptyBytesRefIterator implements BytesRefIterator {
 
-    @Override
+    //@Override - not until Java 6
     public BytesRef next() throws IOException {
       return null;
     }
     
+    public Comparator<BytesRef> getComparator() {
+      return null;
+    }
+    
   }
   
 }

Modified: lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java?rev=1297946&r1=1297945&r2=1297946&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/branches/branch_3x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java Wed Mar  7 11:56:12 2012
@@ -26,6 +26,7 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.PrintStream;
 import java.lang.reflect.Method;
+import java.nio.CharBuffer;
 import java.util.Enumeration;
 import java.util.HashMap;
 import java.util.Map;
@@ -680,4 +681,23 @@ public class _TestUtil {
 
     return doc2;
   }
+  
+  public static CharSequence stringToCharSequence(String string, Random random) {
+    return bytesToCharSequence(new BytesRef(string), random);
+  }
+  
+  public static CharSequence bytesToCharSequence(BytesRef ref, Random random) {
+    switch(random.nextInt(5)) {
+    case 4:
+      CharsRef chars = new CharsRef(ref.length);
+      UnicodeUtil.UTF8toUTF16(ref.bytes, ref.offset, ref.length, chars);
+      return chars;
+    case 3:
+      return CharBuffer.wrap(ref.utf8ToString());
+    default:
+      return ref.utf8ToString();
+    }
+    
+  }
+ 
 }

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java?rev=1297946&r1=1297945&r2=1297946&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/spelling/suggest/Suggester.java Wed Mar  7 11:56:12 2012
@@ -32,6 +32,7 @@ import org.apache.lucene.search.spell.Hi
 import org.apache.lucene.search.suggest.FileDictionary;
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.Lookup.LookupResult;
+import org.apache.lucene.util.CharsRef;
 
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
@@ -132,7 +133,15 @@ public class Suggester extends SolrSpell
     try {
       lookup.build(dictionary);
       if (storeDir != null) {
-        lookup.store(storeDir);
+        if(!lookup.store(storeDir)) {
+          if (sourceLocation == null) {
+            assert reader != null && field != null;
+            LOG.error("Store Lookup build from index on field: " + field + " failed reader has: " + reader.maxDoc() + " docs");
+          } else {
+            LOG.error("Store Lookup build from sourceloaction: " + sourceLocation + " failed");
+
+          }
+        }
       }
     } catch (Exception e) {
       LOG.error("Error while building or storing Suggester data", e);
@@ -153,11 +162,6 @@ public class Suggester extends SolrSpell
     build(core, searcher);
   }
 
-  public void add(String query, int numHits) {
-    LOG.info("add " + query + ", " + numHits);
-    lookup.add(query, new Integer(numHits));
-  }
-  
   static SpellingResult EMPTY_RESULT = new SpellingResult();
 
   @Override
@@ -173,9 +177,12 @@ public class Suggester extends SolrSpell
       return EMPTY_RESULT;
     }
     SpellingResult res = new SpellingResult();
+    CharsRef scratch = new CharsRef();
     for (Token t : options.tokens) {
-      String term = new String(t.buffer(), 0, t.length());
-      List<LookupResult> suggestions = lookup.lookup(term,
+      scratch.chars = t.buffer();
+      scratch.offset = 0;
+      scratch.length = t.length();
+      List<LookupResult> suggestions = lookup.lookup(scratch,
           options.onlyMorePopular, options.count);
       if (suggestions == null) {
         continue;
@@ -184,7 +191,7 @@ public class Suggester extends SolrSpell
         Collections.sort(suggestions);
       }
       for (LookupResult lr : suggestions) {
-        res.add(t, lr.key, ((Number)lr.value).intValue());
+        res.add(t, lr.key.toString(), (int)lr.value);
       }
     }
     return res;