You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by bg...@apache.org on 2012/01/18 21:50:05 UTC

svn commit: r1233060 - in /incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools: similarity/apps/ textsimilarity/ textsimilarity/chunker2matcher/

Author: bgalitsky
Date: Wed Jan 18 20:50:04 2012
New Revision: 1233060

URL: http://svn.apache.org/viewvc?rev=1233060&view=rev
Log:
OPENNLP-414.txt

one more unique application of Similarity component to the problem of semantic filtering of speech recognition results

Added:
    incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessorTest.java
Modified:
    incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SearchResultsProcessorTest.java
    incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/SyntMatcherTest.java
    incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessorTest.java
    incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNodeTest.java

Modified: incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SearchResultsProcessorTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SearchResultsProcessorTest.java?rev=1233060&r1=1233059&r2=1233060&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SearchResultsProcessorTest.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SearchResultsProcessorTest.java Wed Jan 18 20:50:04 2012
@@ -1,55 +1,38 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 package opennlp.tools.similarity.apps;
 
 import java.util.List;
 
 import junit.framework.TestCase;
 
-public class SearchResultsProcessorTest extends TestCase {
-  SearchResultsProcessor proc = new SearchResultsProcessor();
-
-  public void testSearchOrder() {
-    List<HitBase> res = proc.runSearch("How can I pay tax on my income abroad");
-
-    // we verify that top answers have high similarity score
-    System.out.println(res);
-    HitBase first = res.get(0);
-    assertTrue(first.getGenerWithQueryScore() > 3.0);
-    // assertTrue(first.getTitle().indexOf("Foreign")>-1 &&
-    // first.getTitle().indexOf("earned")>-1);
-
-    HitBase second = res.get(1);
-    assertTrue(second.getGenerWithQueryScore() > 1.9);
-    // assertTrue(second.getTitle().indexOf("living abroad")>-1);
-
-  }
-
-  public void testSearchOrder2() {
-    List<HitBase> res = proc
-        .runSearch("Can I estimate what my income tax would be by using my last pay");
-
-    System.out.println(res);
-    HitBase first = res.get(0);
-    assertTrue(first.getGenerWithQueryScore() > 1.9);
-
-    HitBase second = res.get(1);
-    assertTrue(second.getGenerWithQueryScore() > 1.9);
-
-  }
+public class SearchResultsProcessorTest extends TestCase{
+	SearchResultsProcessor proc = new SearchResultsProcessor();
+	
+	
+	public void testSearchOrder(){
+		List<HitBase> res = proc.runSearch("How can I pay tax on my income abroad"); 
+		
+		// we verify that top answers have high similarity score
+		System.out.println(res);
+		HitBase first = res.get(0);
+		assertTrue( first.getGenerWithQueryScore()>3.0);
+		//assertTrue(first.getTitle().indexOf("Foreign")>-1 && first.getTitle().indexOf("earned")>-1);
+		
+		HitBase second = res.get(1);
+		assertTrue( second.getGenerWithQueryScore()>1.9);
+		//assertTrue(second.getTitle().indexOf("living abroad")>-1);
+				
+	}
+	
+	public void testSearchOrder2(){
+		List<HitBase> res = proc.runSearch(
+	   "Can I estimate what my income tax would be by using my last pay"); 
+		
+		System.out.println(res);
+		HitBase first = res.get(0);
+		assertTrue( first.getGenerWithQueryScore()>1.9);
+		
+		HitBase second = res.get(1);
+		assertTrue( second.getGenerWithQueryScore()>1.9);
+				
+	}
 }

Added: incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessorTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessorTest.java?rev=1233060&view=auto
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessorTest.java (added)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessorTest.java Wed Jan 18 20:50:04 2012
@@ -0,0 +1,48 @@
+/*
+
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.similarity.apps;
+
+import java.util.Arrays;
+import java.util.List;
+import junit.framework.TestCase;
+import opennlp.tools.similarity.apps.SpeechRecognitionResultsProcessor;
+import opennlp.tools.similarity.apps.SpeechRecognitionResultsProcessor.SentenceMeaningfullnessScore;
+
+public class SpeechRecognitionResultsProcessorTest extends TestCase {
+
+	public void testRestaurantEntityInSpeechRecognitionResults(){
+		 SpeechRecognitionResultsProcessor proc = new  SpeechRecognitionResultsProcessor();
+		 List<SentenceMeaningfullnessScore> res = proc.runSearchAndScoreMeaningfulness( Arrays.asList(new String[]{
+				 "remember to buy milk tomorrow for details",
+				 "remember to buy milk tomorrow from trader joes",
+				 "remember to buy milk tomorrow from 3 to jones",
+				 "remember to buy milk tomorrow for for details",
+				 "remember to buy milk tomorrow from third to joes",
+				 "remember to buy milk tomorrow from third to jones",
+				 "remember to buy milk tomorrow from for d jones"
+		 }));
+		 
+		 assertTrue(res.get(1).getScore()> res.get(0).getScore()  && res.get(1).getScore()> res.get(2).getScore()  &&
+				 res.get(1).getScore()> res.get(3).getScore()  && res.get(1).getScore()> res.get(4).getScore()  &&
+				 res.get(1).getScore()> res.get(5).getScore()  && res.get(1).getScore()> res.get(6).getScore()  
+				 );
+		 
+	 }
+
+}

Modified: incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/SyntMatcherTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/SyntMatcherTest.java?rev=1233060&r1=1233059&r2=1233060&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/SyntMatcherTest.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/SyntMatcherTest.java Wed Jan 18 20:50:04 2012
@@ -1,4 +1,5 @@
 /*
+
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.

Modified: incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessorTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessorTest.java?rev=1233060&r1=1233059&r2=1233060&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessorTest.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessorTest.java Wed Jan 18 20:50:04 2012
@@ -1,20 +1,3 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 package opennlp.tools.textsimilarity.chunker2matcher;
 
 import java.util.List;
@@ -22,61 +5,86 @@ import java.util.List;
 import junit.framework.TestCase;
 
 import opennlp.tools.textsimilarity.ParseTreeChunk;
+import opennlp.tools.textsimilarity.ParseTreeChunkListScorer;
+import opennlp.tools.textsimilarity.TextSimilarityBagOfWords;
 
-public class ParserChunker2MatcherProcessorTest extends TestCase {
-  ParserChunker2MatcherProcessor parser = ParserChunker2MatcherProcessor
-      .getInstance();
-
-  public void testGroupedPhrasesFormer() {
-
-    String text = "Where do I apply? Go to your town office or city hall. If your town doesn't have an office, ask the town clerk or a Selectman. Tell them that you need a 1040 tax form . I Can 't Pay the Taxes on my House: What Can I Do?. Pine Tree Legal";
-
-    List<List<ParseTreeChunk>> res = parser
-        .formGroupedPhrasesFromChunksForPara(text);
-    System.out.println(res);
-    assertEquals(
-        "[[NP [PRP$-your NN-town NN-office CC-or NN-city NN-hall ], NP [PRP$-your NN-town NN-doesn NN-t ], NP [DT-an NN-office ], NP [DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], NP [DT-a NNP-Selectman ], NP [PRP-them IN-that PRP-you ], NP [PRP-you ], NP [DT-a CD-1040 NN-tax NN-form ], NP [PRP-I ], NP [DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [WP-What MD-Can PRP-I ], NP [PRP-I ], NP [NNP-Pine NNP-Tree NNP-Legal ]], [VP [VBP-do RB-I VB-apply ], VP [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], VP [VBP-have DT-an NN-office ], VP [VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], VP [VB-Tell PRP-them IN-that PRP-you ], VP [VBP-need DT-a CD-1040 NN-tax NN-form ], VP [MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], VP [VB-Do NNP-Pine NNP-Tree NNP-Legal ]], [PP [TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], PP [IN-on PRP$-my N
 NP-House WP-What MD-Can PRP-I ]], [], [SENTENCE [WRB-Where VBP-do RB-I VB-apply ], SENTENCE [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], SENTENCE [IN-If PRP$-your NN-town NN-doesn NN-t VBP-have DT-an NN-office VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], SENTENCE [VB-Tell PRP-them IN-that PRP-you VBP-need DT-a CD-1040 NN-tax NN-form ], SENTENCE [PRP-I MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I VB-Do NNP-Pine NNP-Tree NNP-Legal ]]]",
-        res.toString());
-
-    res = parser
-        .formGroupedPhrasesFromChunksForSentence("How can I get short focus zoom lens for digital camera");
-    assertEquals(
-        "[[NP [PRP-I ], NP [JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ], NP [JJ-digital NN-camera ]], [VP [VB-get JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ]], [PP [IN-for JJ-digital NN-camera ]], [], [SENTENCE [WRB-How MD-can PRP-I VB-get JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ]]]",
-        res.toString());
-
-    res = parser
-        .formGroupedPhrasesFromChunksForSentence("Its classy design and the Mercedes name make it a very cool vehicle to drive. ");
-    assertEquals(
-        "[[NP [PRP$-Its JJ-classy NN-design CC-and DT-the NNP-Mercedes NN-name ], NP [DT-the NNP-Mercedes NN-name ], NP [PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ], NP [DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ], NP [NN-drive ]], [VP [VBP-make PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ]], [PP [TO-to NN-drive ]], [], [SENTENCE [PRP$-Its JJ-classy NN-design CC-and DT-the NNP-Mercedes NN-name VBP-make PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ]]]",
-        res.toString());
-    res = parser
-        .formGroupedPhrasesFromChunksForSentence("Sounds too good to be true but it actually is, the world's first flying car is finally here. ");
-    assertEquals(
-        "[[NP [PRP-it RB-actually ], NP [DT-the NN-world NNS-s JJ-first NN-flying NN-car ]], [VP [VBZ-Sounds RB-too JJ-good ], VP [TO-to VB-be JJ-true CC-but PRP-it RB-actually ], VP [VBZ-is DT-the NN-world NNS-s JJ-first NN-flying NN-car ], VP [VBZ-is RB-finally RB-here ]], [], [ADJP [RB-too JJ-good ], ADJP [JJ-true CC-but PRP-it RB-actually ]], [SENTENCE [VBZ-Sounds RB-too JJ-good TO-to VB-be JJ-true CC-but PRP-it RB-actually VBZ-is DT-the NN-world NNS-s JJ-first NN-flying NN-car VBZ-is RB-finally RB-here ]]]",
-        res.toString());
-    res = parser
-        .formGroupedPhrasesFromChunksForSentence("UN Ambassador Ron Prosor repeated the Israeli position that the only way the Palestinians will get UN membership and statehood is through direct negotiations with the Israelis on a comprehensive peace agreement");
-    assertEquals(
-        "[[NP [NNP-UN NNP-Ambassador NNP-Ron NNP-Prosor ], NP [DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians ], NP [DT-the JJ-only NN-way DT-the NNPS-Palestinians ], NP [DT-the NNPS-Palestinians ], NP [NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [DT-a JJ-comprehensive NN-peace NN-agreement ]], [VP [VBD-repeated DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians ], VP [MD-will VB-get IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]], [PP [IN-that DT-the JJ-only NN-way DT-the NNPS-Palestin
 ians ], PP [IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]], [], [SENTENCE [NNP-UN NNP-Ambassador NNP-Ron NNP-Prosor VBD-repeated DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians MD-will VB-get IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]]]",
-        res.toString());
-  }
-
-  public void testPrintParseTree() {
-    parser
-        .printParseTree("How can I get short focus zoom lens for digital camera");
-  }
-
-  public void testRelevanceAssessm() {
-    String phrase1 = "Its classy design and the Mercedes name make it a very cool vehicle to drive. "
-        + "The engine makes it a powerful car. "
-        + "The strong engine gives it enough power. "
-        + "The strong engine gives the car a lot of power.";
-    String phrase2 = "This car has a great engine. "
-        + "This car has an amazingly good engine. "
-        + "This car provides you a very good mileage.";
-    String sentence = "Not to worry with the 2cv.";
-
-    System.out.println(parser.assessRelevance(phrase1, phrase2));
+public class ParserChunker2MatcherProcessorTest extends TestCase{
+	private ParserChunker2MatcherProcessor parser = ParserChunker2MatcherProcessor.getInstance();
+	private TextSimilarityBagOfWords parserBOW = new TextSimilarityBagOfWords ();
+	private ParseTreeChunkListScorer parseTreeChunkListScorer = new ParseTreeChunkListScorer();
+
+	public void testGroupedPhrasesFormer(){
+
+		String text = "Where do I apply? Go to your town office or city hall. If your town doesn't have an office, ask the town clerk or a Selectman. Tell them that you need a 1040 tax form . I Can 't Pay the Taxes on my House: What Can I Do?. Pine Tree Legal";
+
+
+
+		List<List<ParseTreeChunk>> res = parser.formGroupedPhrasesFromChunksForPara(text);
+		System.out.println(res);
+		assertEquals(
+				"[[NP [PRP$-your NN-town NN-office CC-or NN-city NN-hall ], NP [PRP$-your NN-town NN-doesn NN-t ], NP [DT-an NN-office ], NP [DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], NP [DT-a NNP-Selectman ], NP [PRP-them IN-that PRP-you ], NP [PRP-you ], NP [DT-a CD-1040 NN-tax NN-form ], NP [PRP-I ], NP [DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [PRP$-my NNP-House WP-What MD-Can PRP-I ], NP [WP-What MD-Can PRP-I ], NP [PRP-I ], NP [NNP-Pine NNP-Tree NNP-Legal ]], [VP [VBP-do RB-I VB-apply ], VP [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], VP [VBP-have DT-an NN-office ], VP [VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], VP [VB-Tell PRP-them IN-that PRP-you ], VP [VBP-need DT-a CD-1040 NN-tax NN-form ], VP [MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I ], VP [VB-Do NNP-Pine NNP-Tree NNP-Legal ]], [PP [TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], PP [IN-on PRP$-my NNP-H
 ouse WP-What MD-Can PRP-I ]], [], [SENTENCE [WRB-Where VBP-do RB-I VB-apply ], SENTENCE [VB-Go TO-to PRP$-your NN-town NN-office CC-or NN-city NN-hall ], SENTENCE [IN-If PRP$-your NN-town NN-doesn NN-t VBP-have DT-an NN-office VB-ask DT-the NN-town NN-clerk CC-or DT-a NNP-Selectman ], SENTENCE [VB-Tell PRP-them IN-that PRP-you VBP-need DT-a CD-1040 NN-tax NN-form ], SENTENCE [PRP-I MD-Can VB-t VB-Pay DT-the NNS-Taxes IN-on PRP$-my NNP-House WP-What MD-Can PRP-I VB-Do NNP-Pine NNP-Tree NNP-Legal ]]]",
+				res.toString());
+
+		res = parser.formGroupedPhrasesFromChunksForSentence("How can I get short focus zoom lens for digital camera");
+		assertEquals(
+				"[[NP [PRP-I ], NP [JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ], NP [JJ-digital NN-camera ]], [VP [VB-get JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ]], [PP [IN-for JJ-digital NN-camera ]], [], [SENTENCE [WRB-How MD-can PRP-I VB-get JJ-short NN-focus NN-zoom NN-lens IN-for JJ-digital NN-camera ]]]", 
+				res.toString());
+
+		res = parser.formGroupedPhrasesFromChunksForSentence("Its classy design and the Mercedes name make it a very cool vehicle to drive. ");
+		assertEquals(
+				"[[NP [PRP$-Its JJ-classy NN-design CC-and DT-the NNP-Mercedes NN-name ], NP [DT-the NNP-Mercedes NN-name ], NP [PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ], NP [DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ], NP [NN-drive ]], [VP [VBP-make PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ]], [PP [TO-to NN-drive ]], [], [SENTENCE [PRP$-Its JJ-classy NN-design CC-and DT-the NNP-Mercedes NN-name VBP-make PRP-it DT-a RB-very JJ-cool NN-vehicle TO-to NN-drive ]]]",
+				res.toString());
+		res = parser.formGroupedPhrasesFromChunksForSentence("Sounds too good to be true but it actually is, the world's first flying car is finally here. ");
+		assertEquals(
+				"[[NP [PRP-it RB-actually ], NP [DT-the NN-world NNS-s JJ-first NN-flying NN-car ]], [VP [VBZ-Sounds RB-too JJ-good ], VP [TO-to VB-be JJ-true CC-but PRP-it RB-actually ], VP [VBZ-is DT-the NN-world NNS-s JJ-first NN-flying NN-car ], VP [VBZ-is RB-finally RB-here ]], [], [ADJP [RB-too JJ-good ], ADJP [JJ-true CC-but PRP-it RB-actually ]], [SENTENCE [VBZ-Sounds RB-too JJ-good TO-to VB-be JJ-true CC-but PRP-it RB-actually VBZ-is DT-the NN-world NNS-s JJ-first NN-flying NN-car VBZ-is RB-finally RB-here ]]]",
+				res.toString());
+		res = parser.formGroupedPhrasesFromChunksForSentence("UN Ambassador Ron Prosor repeated the Israeli position that the only way the Palestinians will get UN membership and statehood is through direct negotiations with the Israelis on a comprehensive peace agreement");
+		assertEquals(
+				"[[NP [NNP-UN NNP-Ambassador NNP-Ron NNP-Prosor ], NP [DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians ], NP [DT-the JJ-only NN-way DT-the NNPS-Palestinians ], NP [DT-the NNPS-Palestinians ], NP [NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], NP [DT-a JJ-comprehensive NN-peace NN-agreement ]], [VP [VBD-repeated DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians ], VP [MD-will VB-get IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]], [PP [IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians
  ], PP [IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ], PP [IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]], [], [SENTENCE [NNP-UN NNP-Ambassador NNP-Ron NNP-Prosor VBD-repeated DT-the JJ-Israeli NN-position IN-that DT-the JJ-only NN-way DT-the NNPS-Palestinians MD-will VB-get IN-UN NN-membership CC-and NN-statehood VBZ-is IN-through JJ-direct NNS-negotiations IN-with DT-the NNP-Israelis IN-on DT-a JJ-comprehensive NN-peace NN-agreement ]]]",
+				res.toString());
+	}
+
+	public void testPrintParseTree(){
+		parser.printParseTree("How can I get short focus zoom lens for digital camera");
+	}
+
+	public void testRelevanceAssessm(){
+		String phrase1 = "Its classy design and the Mercedes name make it a very cool vehicle to drive. "
+			+ "The engine makes it a powerful car. "
+			+ "The strong engine gives it enough power. "
+			+ "The strong engine gives the car a lot of power.";
+		String phrase2 = "This car has a great engine. "
+			+ "This car has an amazingly good engine. "
+			+ "This car provides you a very good mileage.";
+
+		System.out.println(parser.assessRelevance(phrase1, phrase2).getMatchResult());
+
+	}
+
+	public void testCompareRelevanceAssessmWithBagOfWords(){
+		// we first demonstrate how similarity expression for DIFFERENT cases have too high score for bagOfWords
+		String phrase1 = "How to deduct rental expense from income ";
+		String phrase2 = "How to deduct repair expense from rental income.";
+		List<List<ParseTreeChunk>> matchResult  = parser.assessRelevance(phrase1, phrase2).getMatchResult();
+		assertEquals(matchResult.toString(), 
+				"[[ [NN-expense IN-from NN-income ],  [JJ-rental NN-* ],  [NN-income ]], [ [TO-to VB-deduct JJ-rental NN-* ],  [VB-deduct NN-expense IN-from NN-income ]]]"); 
+		System.out.println(matchResult);
+		double matchScore = parseTreeChunkListScorer.getParseTreeChunkListScore(matchResult);
+		double bagOfWordsScore = parserBOW.assessRelevanceAndGetScore(phrase1, phrase2);
+		assertTrue(matchScore+2 < bagOfWordsScore);
+		System.out.println("MatchScore is adequate ( = "+matchScore + ") and bagOfWordsScore = "+bagOfWordsScore+" is too high");
+
+		// we now demonstrate how similarity can be captured by POS and cannot be captured by bagOfWords
+		phrase1 = "Way to minimize medical expense for my daughter";
+		phrase2 = "Means to deduct educational expense for my son";
+		matchResult  = parser.assessRelevance(phrase1, phrase2).getMatchResult();
+		assertEquals(matchResult.toString(), 
+			"[[ [JJ-* NN-expense IN-for PRP$-my NN-* ],  [PRP$-my NN-* ]], [ [TO-to VB-* JJ-* NN-expense IN-for PRP$-my NN-* ]]]"); 
+		System.out.println(matchResult);
+		matchScore = parseTreeChunkListScorer.getParseTreeChunkListScore(matchResult);
+		bagOfWordsScore = parserBOW.assessRelevanceAndGetScore(phrase1, phrase2);
+		assertTrue(matchScore > 2*bagOfWordsScore);
+		System.out.println("MatchScore is adequate ( = "+matchScore + ") and bagOfWordsScore = "+bagOfWordsScore+" is too low");
 
-  }
+	}
 }

Modified: incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNodeTest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNodeTest.java?rev=1233060&r1=1233059&r2=1233060&view=diff
==============================================================================
--- incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNodeTest.java (original)
+++ incubator/opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/textsimilarity/chunker2matcher/PhraseNodeTest.java Wed Jan 18 20:50:04 2012
@@ -1,44 +1,25 @@
 package opennlp.tools.textsimilarity.chunker2matcher;
 
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 import java.util.List;
 
 import junit.framework.TestCase;
 
-public class PhraseNodeTest extends TestCase {
-  ParserChunker2MatcherProcessor proc = ParserChunker2MatcherProcessor
-      .getInstance();
-
-  public void testPOSTagsExtraction() {
-
-    SentenceNode node = proc.parseSentenceNode("How can I get there");
-    List<String> pOSlist = node.getOrderedPOSList();
-    assertEquals("[WRB, MD, PRP, VB, RB]", pOSlist.toString());
-
-    node = proc.parseSentenceNode("where do I apply");
-    pOSlist = node.getOrderedPOSList();
-    assertEquals("[WRB, VBP, PRP, RB]", pOSlist.toString());
-
-    // should NOT start with upper case!
-    node = proc.parseSentenceNode("Where do I apply");
-    pOSlist = node.getOrderedPOSList();
-    assertEquals("[WRB, VBP, PRP]", pOSlist.toString());
-  }
-
+public class PhraseNodeTest extends TestCase{
+	ParserChunker2MatcherProcessor proc = ParserChunker2MatcherProcessor.getInstance();
+    public void testPOSTagsExtraction(){
+    	
+    	SentenceNode node  = proc.parseSentenceNode("How can I get there");
+		List<String> pOSlist = node.getOrderedPOSList();
+		assertEquals("[WRB, MD, PRP, VB, RB]", pOSlist.toString());
+		
+		node  = proc.parseSentenceNode("where do I apply");
+		pOSlist = node.getOrderedPOSList();
+		assertEquals("[WRB, VBP, PRP, RB]", pOSlist.toString());
+		
+		// should NOT start with upper case! last tag is missing
+		node  = proc.parseSentenceNode("Where do I apply");
+		pOSlist = node.getOrderedPOSList();
+		assertEquals("[WRB, VBP, PRP]", pOSlist.toString());
+    }
+    	
 }