You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/11/15 11:36:24 UTC

svn commit: r1202119 - in /incubator/opennlp/sandbox/opennlp-similarity: ./ src/main/java/opennlp/tools/similarity/apps/utils/ src/main/java/opennlp/tools/textsimilarity/ src/test/java/opennlp/tools/textsimilarity/

Author: joern
Date: Tue Nov 15 10:36:24 2011
New Revision: 1202119

URL: http://svn.apache.org/viewvc?rev=1202119&view=rev
Log:
OPENNLP-337 Moved Porter Stemmer to opennlp.tools.stemmer package. Thanks to Boris Galitsky for providing a patch.

Removed:
    incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PorterStemmer.java
    incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/PorterStemmer.java
Modified:
    incubator/opennlp/sandbox/opennlp-similarity/pom.xml
    incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java
    incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java
    incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
    incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java
    incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java

Modified: incubator/opennlp/sandbox/opennlp-similarity/pom.xml
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/pom.xml?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/pom.xml (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/pom.xml Tue Nov 15 10:36:24 2011
@@ -44,7 +44,7 @@
 		<dependency>
 		  <groupId>org.apache.opennlp</groupId>
 		  <artifactId>opennlp-tools</artifactId>
-		  <version>1.5.2-incubating-SNAPSHOT</version>
+		  <version>1.5.3-incubating-SNAPSHOT</version>
 		</dependency>
 		
 		<dependency>

Modified: incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/StringDistanceMeasurer.java Tue Nov 15 10:36:24 2011
@@ -20,6 +20,8 @@ package opennlp.tools.similarity.apps.ut
 import java.util.ArrayList;
 import java.util.List;
 
+import opennlp.tools.stemmer.PorterStemmer;
+
 public class StringDistanceMeasurer {
   // external tools
   private PorterStemmer ps; // stemmer
@@ -56,7 +58,7 @@ public class StringDistanceMeasurer {
         // string like preposition is uninteresting
         continue;
       try {
-        w = ps.stem(w.toLowerCase());
+        w = ps.stem(w.toLowerCase()).toString();
       } catch (Exception e) {
         // do nothing, just have original term
       }

Modified: incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/LemmaFormManager.java Tue Nov 15 10:36:24 2011
@@ -19,6 +19,8 @@ package opennlp.tools.textsimilarity;
 
 import java.util.List;
 
+import opennlp.tools.stemmer.PorterStemmer;
+
 public class LemmaFormManager {
 
   public String matchLemmas(PorterStemmer ps, String lemma1, String lemma2,
@@ -67,7 +69,7 @@ public class LemmaFormManager {
     }
     try {
       if (ps != null) {
-        if (ps.stem(lemma1).equalsIgnoreCase(ps.stem(lemma2))) {
+        if (ps.stem(lemma1).toString().equalsIgnoreCase(ps.stem(lemma2).toString())) {
           return lemma1;
         }
       }

Modified: incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/ParseTreeMatcherDeterministic.java Tue Nov 15 10:36:24 2011
@@ -19,6 +19,7 @@ package opennlp.tools.textsimilarity;
 
 import java.util.ArrayList;
 import java.util.List;
+import opennlp.tools.stemmer.PorterStemmer;
 
 public class ParseTreeMatcherDeterministic {
 
@@ -41,7 +42,7 @@ public class ParseTreeMatcherDeterminist
     PorterStemmer ps = new PorterStemmer();
     for (String word : lem1) {
       try {
-        lem1stem.add(ps.stem(word.toLowerCase()));
+        lem1stem.add(ps.stem(word.toLowerCase()).toString());
       } catch (Exception e) {
         // e.printStackTrace();
 
@@ -51,7 +52,7 @@ public class ParseTreeMatcherDeterminist
     }
     try {
       for (String word : lem2) {
-        lem2stem.add(ps.stem(word.toLowerCase()));
+        lem2stem.add(ps.stem(word.toLowerCase()).toString());
       }
     } catch (Exception e) {
       System.err.println("problem processing word " + lem2.toString());

Modified: incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/TextProcessor.java Tue Nov 15 10:36:24 2011
@@ -31,7 +31,7 @@ import java.util.Map;
 import java.util.logging.Logger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-
+import opennlp.tools.stemmer.PorterStemmer;
 import opennlp.tools.similarity.apps.utils.Pair;
 
 import org.apache.commons.lang.StringUtils;
@@ -488,7 +488,7 @@ public class TextProcessor {
       }
     }
 
-    return new PorterStemmer().stem(token);
+    return new PorterStemmer().stem(token).toString();
   }
 
   public static String cleanToken(String token) {
@@ -535,7 +535,7 @@ public class TextProcessor {
     term = stripToken(term);
     PorterStemmer st = new PorterStemmer();
     
-    return st.stem(term);
+    return st.stem(term).toString();
   }
 
   public static String generateFingerPrint(String s) {

Modified: incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java?rev=1202119&r1=1202118&r2=1202119&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/ParseTreeChunkListScorerTest.java Tue Nov 15 10:36:24 2011
@@ -19,20 +19,23 @@ package opennlp.tools.textsimilarity;
 
 import java.util.List;
 
+import opennlp.tools.parser.Parse;
+
 import org.junit.Test;
 import org.junit.runner.RunWith;
+import junit.framework.TestCase;
 
-public class ParseTreeChunkListScorerTest {
-  private ParseTreeChunkListScorer parseTreeChunkListScorer;
-  private ParseTreeChunk parseTreeChunk;
+public class ParseTreeChunkListScorerTest extends TestCase{
+  private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();
+  private ParseTreeChunk parseTreeChunk = new ParseTreeChunk();
 
-  @Test
   public void test() {
     List<List<ParseTreeChunk>> chs = parseTreeChunk
         .obtainParseTreeChunkListByParsingList("[[ [NN-* IN-in NP-israel ],  [NP-* IN-in NP-israel ],  [NP-* IN-* TO-* NN-* ],  [NN-visa IN-* NN-* IN-in ]],"
             + " [ [VB-get NN-visa IN-* NN-* IN-in .-* ],  [VBD-* IN-* NN-* NN-* .-* ],  [VB-* NP-* ]]]");
 
     double sc = parseTreeChunkListScorer.getParseTreeChunkListScore(chs);
-
+    assertTrue(sc>1.90);
+    
   }
 }