You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/11/15 11:36:24 UTC
svn commit: r1202119 - in /incubator/opennlp/sandbox/opennlp-similarity: ./
src/main/java/opennlp/tools/similarity/apps/utils/
src/main/java/opennlp/tools/textsimilarity/
src/test/java/opennlp/tools/textsimilarity/
Author: joern
Date: Tue Nov 15 10:36:24 2011
New Revision: 1202119
URL: http://svn.apache.org/viewvc?rev=1202119&view=rev
Log:
OPENNLP-337 Moved Porter Stemmer to opennlp.tools.stemmer package. Thanks to Boris Galitsky for providing a patch.
Removed:
incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PorterStemmer.java
incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/PorterStemmer.java
Modified:
incubator/opennlp/sandbox/opennlp-similarity/pom.xml
incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java
incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java
incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java
incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java
Modified: incubator/opennlp/sandbox/opennlp-similarity/pom.xml
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/pom.xml?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/pom.xml (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/pom.xml Tue Nov 15 10:36:24 2011
@@ -44,7 +44,7 @@
<dependency>
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-tools</artifactId>
- <version>1.5.2-incubating-SNAPSHOT</version>
+ <version>1.5.3-incubating-SNAPSHOT</version>
</dependency>
<dependency>
Modified: incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java Tue Nov 15 10:36:24 2011
@@ -20,6 +20,8 @@ package opennlp.tools.similarity.apps.ut
import java.util.ArrayList;
import java.util.List;
+import opennlp.tools.stemmer.PorterStemmer;
+
public class StringDistanceMeasurer {
// external tools
private PorterStemmer ps; // stemmer
@@ -56,7 +58,7 @@ public class StringDistanceMeasurer {
// string like preposition is uninteresting
continue;
try {
- w = ps.stem(w.toLowerCase());
+ w = ps.stem(w.toLowerCase()).toString();
} catch (Exception e) {
// do nothing, just have original term
}
Modified: incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java Tue Nov 15 10:36:24 2011
@@ -19,6 +19,8 @@ package opennlp.tools.textsimilarity;
import java.util.List;
+import opennlp.tools.stemmer.PorterStemmer;
+
public class LemmaFormManager {
public String matchLemmas(PorterStemmer ps, String lemma1, String lemma2,
@@ -67,7 +69,7 @@ public class LemmaFormManager {
}
try {
if (ps != null) {
- if (ps.stem(lemma1).equalsIgnoreCase(ps.stem(lemma2))) {
+ if (ps.stem(lemma1).toString().equalsIgnoreCase(ps.stem(lemma2).toString())) {
return lemma1;
}
}
Modified: incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java Tue Nov 15 10:36:24 2011
@@ -19,6 +19,7 @@ package opennlp.tools.textsimilarity;
import java.util.ArrayList;
import java.util.List;
+import opennlp.tools.stemmer.PorterStemmer;
public class ParseTreeMatcherDeterministic {
@@ -41,7 +42,7 @@ public class ParseTreeMatcherDeterminist
PorterStemmer ps = new PorterStemmer();
for (String word : lem1) {
try {
- lem1stem.add(ps.stem(word.toLowerCase()));
+ lem1stem.add(ps.stem(word.toLowerCase()).toString());
} catch (Exception e) {
// e.printStackTrace();
@@ -51,7 +52,7 @@ public class ParseTreeMatcherDeterminist
}
try {
for (String word : lem2) {
- lem2stem.add(ps.stem(word.toLowerCase()));
+ lem2stem.add(ps.stem(word.toLowerCase()).toString());
}
} catch (Exception e) {
System.err.println("problem processing word " + lem2.toString());
Modified: incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java Tue Nov 15 10:36:24 2011
@@ -31,7 +31,7 @@ import java.util.Map;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-
+import opennlp.tools.stemmer.PorterStemmer;
import opennlp.tools.similarity.apps.utils.Pair;
import org.apache.commons.lang.StringUtils;
@@ -488,7 +488,7 @@ public class TextProcessor {
}
}
- return new PorterStemmer().stem(token);
+ return new PorterStemmer().stem(token).toString();
}
public static String cleanToken(String token) {
@@ -535,7 +535,7 @@ public class TextProcessor {
term = stripToken(term);
PorterStemmer st = new PorterStemmer();
- return st.stem(term);
+ return st.stem(term).toString();
}
public static String generateFingerPrint(String s) {
Modified: incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java Tue Nov 15 10:36:24 2011
@@ -19,20 +19,23 @@ package opennlp.tools.textsimilarity;
import java.util.List;
+import opennlp.tools.parser.Parse;
+
import org.junit.Test;
import org.junit.runner.RunWith;
+import junit.framework.TestCase;
-public class ParseTreeChunkListScorerTest {
- private ParseTreeChunkListScorer parseTreeChunkListScorer;
- private ParseTreeChunk parseTreeChunk;
+public class ParseTreeChunkListScorerTest extends TestCase{
+ private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();
+ private ParseTreeChunk parseTreeChunk = new ParseTreeChunk();
- @Test
public void test() {
List<List<ParseTreeChunk>> chs = parseTreeChunk
.obtainParseTreeChunkListByParsingList("[[ [NN-* IN-in NP-israel ], [NP-* IN-in NP-israel ], [NP-* IN-* TO-* NN-* ], [NN-visa IN-* NN-* IN-in ]],"
+ " [ [VB-get NN-visa IN-* NN-* IN-in .-* ], [VBD-* IN-* NN-* NN-* .-* ], [VB-* NP-* ]]]");
double sc = parseTreeChunkListScorer.getParseTreeChunkListScore(chs);
-
+ assertTrue(sc>1.90);
+
}
}