You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/10/02 17:30:29 UTC
svn commit: r1528521 - in /lucene/dev/branches/branch_4x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/util/
lucene/core/src/java/org/apache/lucene/util/fst/ lucene/suggest/
lucene/suggest/src/java/org/apache/lucene/search/suggest/ana...
Author: mikemccand
Date: Wed Oct 2 15:30:29 2013
New Revision: 1528521
URL: http://svn.apache.org/r1528521
Log:
LUCENE-5214: add FreeTextSuggester
Added:
lucene/dev/branches/branch_4x/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
- copied unchanged from r1528517, lucene/dev/trunk/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java
- copied, changed from r1528517, lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/lucene/ (props changed)
lucene/dev/branches/branch_4x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/lucene/core/ (props changed)
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
lucene/dev/branches/branch_4x/lucene/suggest/ (props changed)
lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1528521&r1=1528520&r2=1528521&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Wed Oct 2 15:30:29 2013
@@ -33,6 +33,11 @@ New Features
on best effort which was not user-friendly.
(Uwe Schindler, Robert Muir)
+* LUCENE-5214: Add new FreeTextSuggester, to predict the next word
+ using a simple ngram language model. This is useful for the "long
+ tail" suggestions, when a primary suggester fails to find a
+ suggestion. (Mike McCandless)
+
Bug Fixes
* LUCENE-4998: Fixed a few places to pass IOContext.READONCE instead
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java?rev=1528521&r1=1528520&r2=1528521&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java Wed Oct 2 15:30:29 2013
@@ -383,7 +383,7 @@ public final class BytesRefHash {
return ids[findHash(bytes, code)];
}
- private final int findHash(BytesRef bytes, int code) {
+ private int findHash(BytesRef bytes, int code) {
assert bytesStart != null : "bytesStart is null - not initialized";
// final position
int hashPos = code & hashMask;
@@ -578,7 +578,7 @@ public final class BytesRefHash {
}
/** A simple {@link BytesStartArray} that tracks
- * memory allocation using a private {@link AtomicLong}
+ * memory allocation using a private {@link Counter}
* instance. */
public static class DirectBytesStartArray extends BytesStartArray {
// TODO: can't we just merge this w/
Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java?rev=1528521&r1=1528520&r2=1528521&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/fst/Util.java Wed Oct 2 15:30:29 2013
@@ -238,11 +238,16 @@ public final class Util {
}
}
- private static class FSTPath<T> {
+ /** Represents a path in TopNSearcher.
+ *
+ * @lucene.experimental
+ */
+ public static class FSTPath<T> {
public FST.Arc<T> arc;
public T cost;
public final IntsRef input;
+ /** Sole constructor */
public FSTPath(T cost, FST.Arc<T> arc, IntsRef input) {
this.arc = new FST.Arc<T>().copyFrom(arc);
this.cost = cost;
@@ -300,7 +305,7 @@ public final class Util {
}
// If back plus this arc is competitive then add to queue:
- private void addIfCompetitive(FSTPath<T> path) {
+ protected void addIfCompetitive(FSTPath<T> path) {
assert queue != null;
@@ -399,6 +404,7 @@ public final class Util {
if (queue == null) {
// Ran out of paths
+ //System.out.println(" break queue=null");
break;
}
@@ -408,6 +414,7 @@ public final class Util {
if (path == null) {
// There were less than topN paths available:
+ //System.out.println(" break no more paths");
break;
}
@@ -478,6 +485,7 @@ public final class Util {
//System.out.println(" done!: " + path);
T finalOutput = fst.outputs.add(path.cost, path.arc.output);
if (acceptResult(path.input, finalOutput)) {
+ //System.out.println(" add result: " + path);
results.add(new MinResult<T>(path.input, finalOutput));
} else {
rejectCount++;
@@ -761,11 +769,12 @@ public final class Util {
* Ensures an arc's label is indeed printable (dot uses US-ASCII).
*/
private static String printableLabel(int label) {
- if (label >= 0x20 && label <= 0x7d) {
+ // Any ordinary ascii character, except for " or \, are
+ // printed as the character; else, as a hex string:
+ if (label >= 0x20 && label <= 0x7d && label != 0x22 && label != 0x5c) { // " OR \
return Character.toString((char) label);
- } else {
- return "0x" + Integer.toHexString(label);
}
+ return "0x" + Integer.toHexString(label);
}
/** Just maps each UTF16 unit (char) to the ints in an
Copied: lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java (from r1528517, lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java?p2=lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java&p1=lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java&r1=1528517&r2=1528521&rev=1528521&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java (original)
+++ lucene/dev/branches/branch_4x/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java Wed Oct 2 15:30:29 2013
@@ -146,6 +146,11 @@ public class TestFreeTextSuggester exten
}
@Override
+ public Comparator<BytesRef> getComparator() {
+ return null;
+ }
+
+ @Override
public BytesRef next() {
Document doc;
try {
@@ -324,6 +329,11 @@ public class TestFreeTextSuggester exten
int upto;
@Override
+ public Comparator<BytesRef> getComparator() {
+ return null;
+ }
+
+ @Override
public BytesRef next() {
if (upto == docs.length) {
return null;
Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java?rev=1528521&r1=1528520&r2=1528521&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java Wed Oct 2 15:30:29 2013
@@ -282,7 +282,11 @@ public class _TestUtil {
}
public static String randomSimpleString(Random r, int maxLength) {
- final int end = nextInt(r, 0, maxLength);
+ return randomSimpleString(r, 0, maxLength);
+ }
+
+ public static String randomSimpleString(Random r, int minLength, int maxLength) {
+ final int end = nextInt(r, minLength, maxLength);
if (end == 0) {
// allow 0 length
return "";