You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by bg...@apache.org on 2014/01/06 18:48:32 UTC
svn commit: r1555944 [1/11] - in /opennlp/sandbox/opennlp-similarity/src:
main/java/opennlp/tools/apps/ main/java/opennlp/tools/apps/contentgen/
main/java/opennlp/tools/apps/contentgen/multithreaded/
main/java/opennlp/tools/apps/relevanceVocabs/ main/j...
Author: bgalitsky
Date: Mon Jan 6 17:48:30 2014
New Revision: 1555944
URL: http://svn.apache.org/r1555944
Log:
OPENNLP-628
Added:
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ParserConstants.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/FeatureSpaceCoverageProcessor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/IntersectionSetBuilder.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/ProfileReaderWriter.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/TreeKernelRunner.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ArcType.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/IGeneralizer.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/PTTree.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/Pair.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseCorefsBuilder.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseThicket.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseTreeNode.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/Triple.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/WordWordInterSentenceRelationArc.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/BingQueryRunnerMultipageSearchResults.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MinedSentenceProcessor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MostFrequentWordsFromPageGetter.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/SnippetToParagraph.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageContentSentenceExtractor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilder.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsAttribute.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceExtendedForestSearchResultsProcessorSetFormer.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedExtendedForestSearchResultsProcessor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedSearchResultsProcessor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/PT2ExtendedTreeForestBuilder.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/SnippetToParagraphFull.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeExtenderByAnotherLinkedTree.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/GeneralizationListReducer.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaFormManager.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/Matcher.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilder.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreeChunkListScorer.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePath.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathComparable.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathMatcher.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathMatcherDeterministic.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhraseConcept.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructure.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorRunner.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/YahooAnswersMiner.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/ContentGeneratorRequestHandler.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeSearchRequestHandler.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/NLProgram2CodeRequestHandler.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerRequestHandler.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/cgRequestForm.html
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/solrconfig.xml
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessorTest.java
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/StoryDiscourseNavigatorTest.java
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/kernel_interface/
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTMatcherTest.java
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTPhraseBuilderTest.java
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PairwiseMatcherTest.java
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/parse_thicket2graph/
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructureTest.java
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/
opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarkerTest.java
opennlp/sandbox/opennlp-similarity/src/test/resources/tree_kernel/
opennlp/sandbox/opennlp-similarity/src/test/resources/tree_kernel/tree_kernel.zip (with props)
Modified:
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBase.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBaseComparable.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SearchResultsProcessor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessor.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java
opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,54 @@
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.similarity.apps.BingQueryRunner;
+import opennlp.tools.similarity.apps.HitBase;
+
+public class BingWebQueryRunnerThread extends BingQueryRunner implements Runnable{
+
+ private String query;
+ private List<HitBase> results= new ArrayList<HitBase>();
+ public BingWebQueryRunnerThread(String Query){
+ super();
+ this.query=Query;
+ }
+ public void run(){
+ results=runSearch(query);
+ fireMyEvent(new MyEvent(this));
+ }
+ public List<HitBase> getResults() {
+ return results;
+ }
+
+ public String getQuery() {
+ return query;
+ }
+
+ // Create the listener list
+ protected javax.swing.event.EventListenerList listenerList = new javax.swing.event.EventListenerList();
+ // This methods allows classes to register for MyEvents
+
+ public void addMyEventListener(MyEventListener listener) {
+ listenerList.add(MyEventListener.class, listener);
+ }
+ // This methods allows classes to unregister for MyEvents
+
+ public void removeMyEventListener(MyEventListener listener) {
+ listenerList.remove(MyEventListener.class, listener);
+ }
+
+ void fireMyEvent(MyEvent evt) {
+ Object[] listeners = listenerList.getListenerList();
+ // Each listener occupies two elements - the first is the listener class
+ // and the second is the listener instance
+ for (int i = 0; i < listeners.length; i += 2) {
+ if (listeners[i] == MyEventListener.class) {
+ ((MyEventListener) listeners[i + 1]).MyEvent(evt);
+ }
+ }
+ }
+
+
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,88 @@
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;
+
+
+public class Fragment {
+
+ public String resultText; // result
+ public double score;
+ public String fragment; // original
+ public String sourceURL;
+
+ Fragment(String text, double score) {
+ this.resultText = text;
+ this.score = score;
+ }
+
+
+ public String getResultText() {
+ return resultText;
+ }
+
+ public void setResultText(String resultText) {
+ this.resultText = resultText;
+ }
+
+
+
+ public double getScore() {
+ return score;
+ }
+
+
+
+ public void setScore(double score) {
+ this.score = score;
+ }
+
+
+
+ public String getFragment() {
+ return fragment;
+ }
+
+
+
+ public void setFragment(String fragment) {
+ this.fragment = fragment;
+ }
+
+
+
+ public String getSourceURL() {
+ return sourceURL;
+ }
+
+
+ public void setSourceURL(String sourceURL) {
+ this.sourceURL = sourceURL;
+ }
+
+
+ public String toString(){
+ return this.resultText;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ Fragment fragment = (Fragment) o;
+
+ if (resultText == null && fragment.resultText == null) {
+ return true;
+ } else if ((resultText == null && fragment.resultText != null) || (resultText != null && fragment.resultText == null)) {
+ return false;
+ }
+
+ StringDistanceMeasurer sdm = new StringDistanceMeasurer();
+ return sdm.measureStringDistance(resultText, fragment.resultText) > 0.8;
+ }
+
+ @Override
+ public int hashCode() {
+ return resultText != null ? resultText.hashCode() : 0;
+ }
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,12 @@
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import java.util.EventObject;
+
+public class MyEvent extends EventObject {
+
+ public MyEvent(Object arg0) {
+ super(arg0);
+ // TODO Auto-generated constructor stub
+ }
+
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,8 @@
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import java.util.EventListener;
+
+
+public interface MyEventListener extends EventListener{
+ public void MyEvent(MyEvent evt);
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html Mon Jan 6 17:48:30 2014
@@ -0,0 +1,37 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+
+<html xmlns='http://www.w3.org/1999/xhtml'>
+ <head >
+ <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>
+ <title >Submit Your Essay Writing request here</title>
+ </head>
+<body>
+<h1>Submit Your Essay Writing request here / Envie su solicitud ensayo escrito aqui</h1>
+
+<form id='sampleform' method='post' action='http://173.255.254.250:8983/solr/contentgen/?resourceDir=/home/solr/solr-4.4.0/example/src/test/resources&workDir=/home/solr/solr-4.4.0/example/solr-webapp/webapp/WEB-INF/lib&relevanceThreshold=0.5&bingKey=e8ADxIjn9YyHx36EihdjH/tMqJJItUrrbPTUpKahiU0=' >
+ <p>
+ Topic for your essay/Tema de su ensayo: <input type='text' name='q' value='albert einstein' size='35' maxlength='100'/>
+ </p>
+ <p>
+ Email to receive your essay/para recibir su ensayo: <input type='text' name='email' />
+ </p>
+
+ <p>
+ Select language/seleccionar el idioma: <select name="lang" >
+ <option value="en-US"> English</option>
+ <option value="es-US"> Espaniol</option>
+ <option value="de-DE"> German</option>
+ </select>
+ </p>
+ <p>
+ Number of Bing calls to write a this essay: <input type='text' name='stepsNum' value='20' size='5' maxlength='10'/>
+ Number of Bing search results for each call to use for writing: <input type='text' name='searchResultsNum' value='100' size='5' maxlength='10'/>
+ </p>
+<p>
+ <input type='submit' name='Submit' value='Submit/presentar' />
+ </p>
+</form>
+
+</body>
+</html>
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html Mon Jan 6 17:48:30 2014
@@ -0,0 +1,47 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+
+<html xmlns='http://www.w3.org/1999/xhtml'>
+ <head >
+ <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>
+ <title >Submit Your Code Writing request here</title>
+ </head>
+<body>
+<h1>Submit Your Code Writing request here</h1>
+
+<form id='sampleform' method='post' action='http://173.255.254.250:8983/solr/nlprog2code/?' >
+ <p>
+ Write what you want your program to do in natural language <input type='text' name='line' value='define a class named ...' size='35' maxlength='120'/>
+ </p>
+ <p>
+ <input type='text' name='line' value='define a function taking a string s1 and an integer i2 ' size='35' maxlength='150'/>
+ </p>
+ <p>
+ <input type='text' name='line' size='35' maxlength='200'/>
+ </p>
+ <p>
+ <input type='text' name='line' size='35' maxlength='200'/>
+ </p>
+ <p>
+ <input type='text' name='line' size='35' maxlength='200'/>
+ </p>
+ <p>
+ <input type='text' name='line' size='35' maxlength='200'/>
+ </p>
+ <p>
+ <input type='text' name='line' size='35' maxlength='200'/>
+ </p>
+ <p>
+ <input type='text' name='line' size='35' maxlength='200'/>
+ </p>
+ <p>
+ <input type='text' name='line' size='35' maxlength='200'/>
+ </p>
+
+<p>
+ <input type='submit' name='Submit' value='Submit' />
+ </p>
+</form>
+
+</body>
+</html>
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,74 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+public interface POStags {
+ // added new POS types for infinitive phrase and participle phrase
+ public static final String TYPE_STP = "STP"; // infinitive phrase
+ public static final String TYPE_SGP = "SGP"; // present participle phrase
+ public static final String TYPE_SNP = "SNP"; // past participle phrase
+
+ // below are the standard POS types,
+ // http://bulba.sdsu.edu/jeanette/thesis/PennTags.html
+ public static final String TYPE_ADJP = "ADJP";
+ public static final String TYPE_ADVP = "ADVP";
+ public static final String TYPE_CC = "CC";
+ public static final String TYPE_CD = "CD";
+ public static final String TYPE_CONJP = "CONJP";
+ public static final String TYPE_DT = "DT";
+ public static final String TYPE_EX = "EX";
+ public static final String TYPE_FRAG = "FRAG";
+ public static final String TYPE_FW = "FW";
+ public static final String TYPE_IN = "IN";
+ public static final String TYPE_INTJ = "INTJ";
+ public static final String TYPE_JJ = "JJ";
+ public static final String TYPE_JJR = "JJR";
+ public static final String TYPE_JJS = "JJS";
+ public static final String TYPE_LS = "LS";
+ public static final String TYPE_LST = "LST";
+ public static final String TYPE_MD = "MD";
+ public static final String TYPE_NAC = "NAC";
+ public static final String TYPE_NN = "NN";
+ public static final String TYPE_NNS = "NNS";
+ public static final String TYPE_NNP = "NNP";
+ public static final String TYPE_NNPS = "NNPS";
+ public static final String TYPE_NP = "NP";
+ public static final String TYPE_NX = "NX";
+ public static final String TYPE_PDT = "PDT";
+ public static final String TYPE_POS = "POS";
+ public static final String TYPE_PP = "PP";
+ public static final String TYPE_PRN = "PRN";
+ public static final String TYPE_PRP = "PRP";
+ public static final String TYPE_PRP$ = "PRP$";
+ public static final String TYPE_PRT = "PRT";
+ public static final String TYPE_QP = "QP";
+ public static final String TYPE_RB = "RB";
+ public static final String TYPE_RBR = "RBR";
+ public static final String TYPE_RBS = "RBS";
+ public static final String TYPE_RP = "RP";
+ public static final String TYPE_RRC = "RRC";
+ public static final String TYPE_S = "S";
+ public static final String TYPE_SBAR = "SBAR";
+ public static final String TYPE_SBARQ = "SBARQ";
+ public static final String TYPE_SINV = "SINV";
+ public static final String TYPE_SQ = "SQ";
+ public static final String TYPE_SYM = "SYM";
+ public static final String TYPE_TO = "TO";
+ public static final String TYPE_TOP = "TOP";
+ public static final String TYPE_UCP = "UCP";
+ public static final String TYPE_UH = "UH";
+ public static final String TYPE_VB = "VB";
+ public static final String TYPE_VBD = "VBD";
+ public static final String TYPE_VBG = "VBG";
+ public static final String TYPE_VBN = "VBN";
+ public static final String TYPE_VBP = "VBP";
+ public static final String TYPE_VBZ = "VBZ";
+ public static final String TYPE_VP = "VP";
+ public static final String TYPE_WDT = "WDT";
+ public static final String TYPE_WHADJP = "WHADJP";
+ public static final String TYPE_WHADVP = "WHADVP";
+ public static final String TYPE_WHNP = "WHNP";
+ public static final String TYPE_WHPP = "WHPP";
+ public static final String TYPE_WP = "WP";
+ public static final String TYPE_WP$ = "WP$";
+ public static final String TYPE_WRB = "WRB";
+ public static final String TYPE_X = "X";
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,215 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+
+import opennlp.tools.parser.Parse;
+import opennlp.tools.textsimilarity.ParseTreeChunk;
+import opennlp.tools.textsimilarity.TextProcessor;
+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
+import opennlp.tools.util.Span;
+
+public class PhraseProcessor {
+
+ private ParserChunker2MatcherProcessor nlProc = ParserChunker2MatcherProcessor.getInstance() ;
+
+ public static boolean allChildNodesArePOSTags(Parse p)
+ {
+ Parse[] subParses = p.getChildren();
+ for (int pi = 0; pi < subParses.length; pi++)
+ if (!((Parse) subParses[pi]).isPosTag())
+ return false;
+ return true;
+ }
+
+ public ArrayList<String> getNounPhrases(Parse p)
+ {
+ ArrayList<String> nounphrases = new ArrayList<String>();
+
+ Parse[] subparses = p.getChildren();
+ for (int pi = 0; pi < subparses.length; pi++)
+ {
+
+ if (subparses[pi].getType().equals("NP") && allChildNodesArePOSTags(subparses[pi]))
+ {
+ Span _span = subparses[pi].getSpan();
+ nounphrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
+ }
+ else if (!((Parse) subparses[pi]).isPosTag())
+ nounphrases.addAll(getNounPhrases(subparses[pi]));
+ }
+
+ return nounphrases;
+ }
+
+ public ArrayList<String> getVerbPhrases(Parse p)
+ {
+ ArrayList<String> verbPhrases = new ArrayList<String>();
+
+ Parse[] subparses = p.getChildren();
+ for (int pi = 0; pi < subparses.length; pi++)
+ {
+
+ if (subparses[pi].getType().startsWith("VB") && allChildNodesArePOSTags(subparses[pi]))
+ {
+ Span _span = subparses[pi].getSpan();
+ verbPhrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
+ }
+ else if (!((Parse) subparses[pi]).isPosTag())
+ verbPhrases.addAll(getNounPhrases(subparses[pi]));
+ }
+
+ return verbPhrases;
+ }
+
+ // forms phrases from text which are candidate expressions for events lookup
+ public List<ParseTreeChunk> getVerbPhrases(String sentence) {
+ if (sentence==null)
+ return null;
+ if (sentence.split(" ").length ==1) { // this is a word, return empty
+ //queryArrayStr.add( sentence);
+ return null;
+ }
+ if (sentence.length()>100)
+ return null ; // too long of a sentence to parse
+
+ System.out.println("About to parse: "+sentence);
+ List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence);
+ if (groupedChunks.size()<1)
+ return null;
+
+ List<ParseTreeChunk> vPhrases = groupedChunks.get(1);
+
+ return vPhrases;
+ }
+
+ public List<List<ParseTreeChunk>> getPhrasesOfAllTypes(String sentence) {
+ if (sentence==null)
+ return null;
+ if (sentence.split(" ").length ==1) { // this is a word, return empty
+ //queryArrayStr.add( sentence);
+ return null;
+ }
+ if (sentence.length()>200)
+ return null ; // too long of a sentence to parse
+
+ System.out.println("About to parse: "+sentence);
+ List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence);
+ if (groupedChunks.size()<1)
+ return null;
+
+ return groupedChunks;
+ }
+
+ // forms phrases from text which are candidate expressions for events lookup
+ public List<String> extractNounPhraseProductNameCandidate(String sentence) {
+
+ List<String> queryArrayStr = new ArrayList<String>();
+
+ if (sentence.split(" ").length ==1) { // this is a word, return empty
+ //queryArrayStr.add( sentence);
+ return queryArrayStr;
+ }
+ String quoted1 = StringUtils.substringBetween(sentence, "\"", "\"");
+ String quoted2 = StringUtils.substringBetween(sentence, "\'", "\'");
+ List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence);
+ if (groupedChunks.size()<1)
+ return queryArrayStr;
+
+ List<ParseTreeChunk> nPhrases = groupedChunks.get(0);
+
+ for (ParseTreeChunk ch : nPhrases) {
+ String query = "";
+ int size = ch.getLemmas().size();
+ boolean phraseBeingFormed = false;
+ for (int i = 0; i < size; i++) {
+ if ((ch.getPOSs().get(i).startsWith("N") || ch.getPOSs().get(i)
+ .startsWith("J") || ch.getPOSs().get(i).startsWith("CD") ) )
+ // && StringUtils.isAlpha(ch.getLemmas().get(i)))
+ {
+ query += ch.getLemmas().get(i) + " ";
+ phraseBeingFormed = true;
+ } else
+ if ((ch.getPOSs().get(i).startsWith("PR") || ch.getPOSs().get(i).startsWith("IN") || ch.getPOSs().get(i).startsWith("TO") )
+ && phraseBeingFormed )
+ break;
+ else if (ch.getPOSs().get(i).startsWith("DT") || ch.getPOSs().get(i).startsWith("CC"))
+ continue;
+ }
+ query = query.trim();
+ int len = query.split(" ").length;
+ if (len > 5 || len < 2) // too long or too short
+ continue;
+
+ /*
+ if (len < 4 && len>1) { // every word should start with capital
+ String[] qs = query.split(" ");
+ boolean bAccept = true;
+ for (String w : qs) {
+ if (w.toLowerCase().equals(w)) // idf only two words then
+ // has to be person name,
+ // title or geo
+ // location
+ bAccept = false;
+ }
+ if (!bAccept)
+ continue;
+ }
+ */
+ // individual word, possibly a frequent word
+ // if len==1 do nothing
+
+ query = query.trim();
+ queryArrayStr.add(query);
+
+ }
+ /*
+ if (queryArrayStr.size() < 1) { // release constraints on NP down to 2
+ // keywords
+ for (ParseTreeChunk ch : nPhrases) {
+ String query = "";
+ int size = ch.getLemmas().size();
+
+ for (int i = 0; i < size; i++) {
+ if (ch.getPOSs().get(i).startsWith("N")
+ || ch.getPOSs().get(i).startsWith("J")) {
+ query += ch.getLemmas().get(i) + " ";
+ }
+ }
+ query = query.trim();
+ int len = query.split(" ").length;
+ if (len < 2)
+ continue;
+
+ query = TextProcessor.fastTokenize(query.toLowerCase(), false)
+ .toString().replace('[', ' ').replace(']', ' ').trim();
+ if (query.length() > 6)
+ queryArrayStr.add(query);
+ }
+ }
+ //queryArrayStr = Utils
+ // .removeDuplicatesFromQueries(queryArrayStr);
+ if (quoted1 != null
+ && ((quoted1.length() > 5 && !stopList.isCommonWord(quoted1)) || quoted1
+ .length() > 10))
+ queryArrayStr.add(quoted1);
+ if (quoted2 != null
+ && ((quoted2.length() > 5 && !stopList.isCommonWord(quoted2)) || quoted2
+ .length() > 10))
+ queryArrayStr.add(quoted2);
+ */ return queryArrayStr;
+ }
+
+
+
+
+ public static void main(String[] args){
+ String sent = "Appliances and Kitchen Gadgets - CNET Blogs";
+ //"The tablet phenomenon turns Silicon Valley upside down - SiliconValley.com";
+ List<String> res = new PhraseProcessor().extractNounPhraseProductNameCandidate(sent);
+ System.out.println(res);
+ }
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,199 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+
+public class SentimentVocab {
+ private static final String[] POSITIVE_ADJECTTIVE_LIST = { "accessible",
+ "advanced", "affordable", "amazing", "awesome", "beautiful",
+ "brilliant", "capable", "classic", "clear", "comfortable",
+ "convenient", "cool", "courteous", "cute", "decent", "delight",
+ "easy", "elegant", "enjoyable", "enough", "excellent",
+ "exceptional", "fabulous", "fancy", "fantastic", "fast",
+ "favorable", "fine", "friendly", "fun", "good", "great", "handy",
+ "happy", "hefty", "helpful", "high", "immaculate", "impressive",
+ "incredible", "interesting", "jealous", "lovely", "lucky",
+ "luxurious", "marvelous", "maximum", "memorable", "neat", "nice",
+ "outstanding", "perfect", "pleasant", "positive", "pretty",
+ "powerful", "quiet", "reasonable", "remarkable", "right", "safe",
+ "silky", "sleek", "slick", "stylish", "suitable", "superb",
+ "tasteful", "terrific", "top", "unbelievable", "useful",
+ "welcoming", "wonderful", "worthwhile" };
+
+ private static final String[] NEGATIVE_ADJECTTIVE_LIST = { "angry",
+ "annoyed", "annoying", "anxious", "arrogant", "ashamed", "awful",
+ "bad", "bored", "boring", "broke", "broken", "clumsy",
+ "complicate", "complicated", "confused", "cranky", "crazy",
+ "cumbersome", "defective", "depressed", "dead", "depressing",
+ "difficult", "dirty", "disappointed", "disappointing", "disgusted",
+ "disgusting", "disheartened", "disheartening", "dissatisfactory",
+ "dissatisfying", "distant", "disturbed", "dizzy", "doubtful",
+ "down", "drab", "dull", "dysfunctional", "embarrassed", "evil",
+ "exhausted", "fatal", "filthy", "flawed", "fragile", "frightened",
+ "frustrating", "goofy", "grieving", "hard", "horrific",
+ "horrifying", "harsh", "horrible", "impossible", "inconvenient",
+ "insane", "lack", "lacking", "lazy", "leaking", "leaky", "lonely",
+ "low", "mediocre", "messy", "mysterious", "nasty", "naughty",
+ "negative", "noisy", "nonclean", "nutty", "outdated", "outrageous",
+ "over priced", "pathetic", "poor", "premature", "pricey", "pricy",
+ "problematic", "putrid", "puzzled", "rickety", "ridiculous",
+ "ripped off", "rugged", "slow", "stinky", "strange", "stupid",
+ "sweaty", "tedious", "terrible", "tired", "tough", "toxic",
+ "trubled", "ugly", "unbearable", "unclean", "uncomfortable",
+ "unfortunate", "unhelpful", "uninviting", "unpleasent",
+ "unsanitary", "upseting", "unusable", "weird", "worn", "worn down",
+ "wretched", "wrong" };
+
+ private static final String[] POSITIVE_ADVERB_LIST = { "absolutely",
+ "amazingly", "completely", "definitely", "easily", "fairly",
+ "highly", "immensely", "incredibly", "nicely", "really", "rich",
+ "simply", "surprisingly", "tastefully", "totally", "truly", "very",
+ "well" };
+
+ private static final String[] NEGATIVE_ADVERB_LIST = { "badly",
+ "deceptfully", "down", "horribly", "oddly", "pathetically",
+ "terribly", "too", "unfortunately" };
+
+ private static final String[] POSITIVE_NOUN_LIST = { "ability", "benefit",
+ "character", "charm", "comfort", "discount", "dream", "elegance",
+ "favourite", "feature", "improvement", "luck", "luxury", "offer",
+ "pro", "quality", "requirement", "usability" };
+
+ private static final String[] NEGATIVE_NOUN_LIST = { "blocker",
+ "challenge", "complain", "complaint", "compromise", "con",
+ "concern", "crap", "disappointment", "disillusion", "doubt",
+ "downside", "drawback", "embarrassment", "error", "failure",
+ "fault", "garbage", "glitch", "inability", "issue", "junk",
+ "long line", "malfunction", "mess", "mistake", "nightmare",
+ "noise", "odor", "pain", "pitfall", "problem", "rip off", "roach",
+ "rude", "sacrifice", "shame", "shock", "stain", "threat",
+ "trouble", "urine", "worry" };
+
+ private static final String[] POSITIVE_VERB_LIST = { "admire", "amaze",
+ "assist", "disgust", "enjoy", "help", "guarantee", "impress",
+ "improve", "like", "love", "patronize", "prefer", "recommend",
+ "want" };
+
+ private static final String[] NEGATIVE_VERB_LIST = { "annoy", "appall",
+ "break", "complain", "confuse", "depress", "disappoint",
+ "dishearten", "dislike", "dissatisfy", "embarrass", "fail", "fear",
+ "flaw", "frustrate", "hate", "ruin", "scare", "stink", "suck",
+ "think twice", "thwart", "upset", "vomit" };
+
+ public static final int SENTIMENT_POSITIVE = 1;
+ public static final int SENTIMENT_UNKNOWN = 0;
+ public static final int SENTIMENT_NEGATIVE = -1;
+
+ private static SentimentVocab instance = new SentimentVocab();
+
+ // complete sentiment word map, key = word, value = sentiment object
+ private Map<String, Sentiment> sentimentMap = new HashMap<String, Sentiment>();
+
+ // sentiment word sets, key = POS type, value = word set
+ private Map<String, HashSet<String>> wordSetMap = new HashMap<String, HashSet<String>>();
+
+ public static class Sentiment {
+ public String posType;
+ public int sentimentType;
+
+ Sentiment(String posType, int sentimentType) {
+ this.posType = posType;
+ this.sentimentType = sentimentType;
+ }
+ }
+
+ public static SentimentVocab getInstance() {
+ return instance;
+ }
+
+ public Sentiment getSentiment(String word) {
+ if (word == null)
+ return null;
+
+ // get the normalized form of the word
+ //word = WordDictionary.getInstance().getLemmaOrWord(word);
+
+ return sentimentMap.get(word);
+ }
+
+ public Sentiment getSentiment(String word, String posType) {
+ if (word == null)
+ return null;
+
+ // get the normalized form of the word
+ word = WordDictionary.getInstance().getLemmaOrWord(word, posType);
+
+ return sentimentMap.get(word);
+ }
+
+ public boolean isSentimentWord(String word) {
+ return (getSentiment(word) != null);
+ }
+
+ public boolean isSentimentWord(String word, String posType) {
+ Sentiment sentiment = getSentiment(word, posType);
+ if (sentiment == null)
+ return false;
+
+ return sentiment.posType == posType;
+ }
+
+ public HashSet<String> getSentimentWordSet(String posType) {
+ if (posType == null)
+ return null;
+
+ return wordSetMap.get(posType);
+ }
+
+ public static String getSentimentName(int sentimentType) {
+ switch (sentimentType) {
+ case SENTIMENT_POSITIVE:
+ return "positive";
+ case SENTIMENT_NEGATIVE:
+ return "negative";
+ default:
+ return "unknown";
+ }
+ }
+
+ private SentimentVocab() {
+ // populate the sentiment map
+ addWordsToSentimentMap(POSITIVE_ADJECTTIVE_LIST,
+ POStags.TYPE_JJ, SENTIMENT_POSITIVE);
+ addWordsToSentimentMap(NEGATIVE_ADJECTTIVE_LIST,
+ POStags.TYPE_JJ, SENTIMENT_NEGATIVE);
+ addWordsToSentimentMap(POSITIVE_ADVERB_LIST, POStags.TYPE_RB,
+ SENTIMENT_POSITIVE);
+ addWordsToSentimentMap(NEGATIVE_ADVERB_LIST, POStags.TYPE_RB,
+ SENTIMENT_NEGATIVE);
+ addWordsToSentimentMap(POSITIVE_NOUN_LIST, POStags.TYPE_NN,
+ SENTIMENT_POSITIVE);
+ addWordsToSentimentMap(NEGATIVE_NOUN_LIST, POStags.TYPE_NN,
+ SENTIMENT_NEGATIVE);
+ addWordsToSentimentMap(POSITIVE_VERB_LIST, POStags.TYPE_VB,
+ SENTIMENT_POSITIVE);
+ addWordsToSentimentMap(NEGATIVE_VERB_LIST, POStags.TYPE_VB,
+ SENTIMENT_NEGATIVE);
+ }
+
+ private void addWordsToSentimentMap(String[] words, String posType,
+ int sentimentType) {
+
+ // add the word to the complete sentiment word map
+ for (String word : words) {
+ sentimentMap.put(word, new Sentiment(posType, sentimentType));
+ }
+
+ // add the word to the corresponding sentiment word set
+ HashSet<String> wordSet = wordSetMap.get(posType);
+ if (wordSet == null) {
+ wordSet = new HashSet<String>();
+ wordSetMap.put(posType, wordSet);
+ }
+ for (String word : words) {
+ wordSet.add(word);
+ }
+ }
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,88 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+
+public class SynonymListFilter {
+ SynonymMap map=null;
+
+ public SynonymListFilter(String dir){
+ dir = dir.replace("maps/analytics","");
+ try {
+ map = new SynonymMap( new FileInputStream(dir+"wn_s.pl"));
+ } catch (FileNotFoundException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
+
+ protected static Map<String, List<String>> filteredKeyword_synonyms = new HashMap<String, List<String>>();
+
+ static public List<String> getFileLines(File aFile) {
+
+ List<String> items = new ArrayList<String>();
+
+ StringBuilder contents = new StringBuilder();
+ try {
+
+ BufferedReader input = new BufferedReader(new FileReader(aFile));
+ try {
+ String line = null; //not declared within while loop
+ while (( line = input.readLine()) != null){
+ int endOfWord = line.indexOf(';');
+ if (endOfWord>2)
+ line = line.substring(1, endOfWord -1 );
+
+ items.add(line);
+
+ }
+ }
+ finally {
+ input.close();
+ }
+ }
+ catch (IOException ex){
+ ex.printStackTrace();
+ }
+
+ return items;
+ }
+ public String getSynonym (String word){
+ String[] synonyms = map.getSynonyms(word);
+ if (synonyms==null || synonyms.length<1)
+ return null;
+ int index = (int) Math.floor(Math.random()*(double)synonyms.length);
+ System.out.println("Found synonyms "+Arrays.asList(synonyms).toString()+ " | selected synonym = "+synonyms[index] +" | for the input = "+ word);
+ return synonyms[index];
+
+ }
+ public static void main(String[] args){
+ SynonymListFilter filter = new SynonymListFilter("/src/test/resources");
+ String syn = filter.getSynonym("bring");
+ syn = filter.getSynonym("yell");
+ }
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,379 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.io.IOException;
+ import java.io.InputStream;
+ import java.nio.ByteBuffer;
+ import java.nio.charset.Charset;
+ import java.util.ArrayList;
+ import java.util.Arrays;
+ import java.util.HashMap;
+ import java.util.Iterator;
+ import java.util.Map;
+ import java.util.TreeMap;
+ import java.util.TreeSet;
+
+ /**
+ * Loads the <a target="_blank"
+ * href="http://www.cogsci.princeton.edu/~wn/">WordNet </a> prolog file <a
+ * href="http://www.cogsci.princeton.edu/2.0/WNprolog-2.0.tar.gz">wn_s.pl </a>
+ * into a thread-safe main-memory hash map that can be used for fast
+ * high-frequency lookups of synonyms for any given (lowercase) word string.
+ * <p>
+ * There holds: If B is a synonym for A (A -> B) then A is also a synonym for B (B -> A).
+ * There does not necessarily hold: A -> B, B -> C then A -> C.
+ * <p>
+ * Loading typically takes some 1.5 secs, so should be done only once per
+ * (server) program execution, using a singleton pattern. Once loaded, a
+ * synonym lookup via {@link #getSynonyms(String)}takes constant time O(1).
+ * A loaded default synonym map consumes about 10 MB main memory.
+ * An instance is immutable, hence thread-safe.
+ * <p>
+ * This implementation borrows some ideas from the Lucene Syns2Index demo that
+ * Dave Spencer originally contributed to Lucene. Dave's approach
+ * involved a persistent Lucene index which is suitable for occasional
+ * lookups or very large synonym tables, but considered unsuitable for
+ * high-frequency lookups of medium size synonym tables.
+ * <p>
+ * Example Usage:
+ * <pre>
+ * String[] words = new String[] { "hard", "woods", "forest", "wolfish", "xxxx"};
+ * SynonymMap map = new SynonymMap(new FileInputStream("samples/fulltext/wn_s.pl"));
+ * for (int i = 0; i < words.length; i++) {
+ * String[] synonyms = map.getSynonyms(words[i]);
+ * System.out.println(words[i] + ":" + java.util.Arrays.asList(synonyms).toString());
+ * }
+ *
+ * Example output:
+ * hard:[arduous, backbreaking, difficult, fermented, firmly, grueling, gruelling, heavily, heavy, intemperately, knockout, laborious, punishing, severe, severely, strong, toilsome, tough]
+ * woods:[forest, wood]
+ * forest:[afforest, timber, timberland, wood, woodland, woods]
+ * wolfish:[edacious, esurient, rapacious, ravening, ravenous, voracious, wolflike]
+ * xxxx:[]
+ * </pre>
+ *
+ * @see <a target="_blank"
+ * href="http://www.cogsci.princeton.edu/~wn/man/prologdb.5WN.html">prologdb
+ * man page </a>
+ * @see <a target="_blank" href="http://www.hostmon.com/rfc/advanced.jsp">Dave's synonym demo site</a>
+ */
+ public class SynonymMap {
+
+ /** the index data; Map<String word, String[] synonyms> */
+ private final HashMap<String,String[]> table;
+
+ private static final String[] EMPTY = new String[0];
+
+ private static final boolean DEBUG = false;
+
+ /**
+ * Constructs an instance, loading WordNet synonym data from the given input
+ * stream. Finally closes the stream. The words in the stream must be in
+ * UTF-8 or a compatible subset (for example ASCII, MacRoman, etc.).
+ *
+ * @param input
+ * the stream to read from (null indicates an empty synonym map)
+ * @throws IOException
+ * if an error occured while reading the stream.
+ */
+ public SynonymMap(InputStream input) throws IOException {
+ this.table = input == null ? new HashMap<String,String[]>(0) : read(toByteArray(input));
+ }
+
+ /**
+ * Returns the synonym set for the given word, sorted ascending.
+ *
+ * @param word
+ * the word to lookup (must be in lowercase).
+ * @return the synonyms; a set of zero or more words, sorted ascending, each
+ * word containing lowercase characters that satisfy
+ * <code>Character.isLetter()</code>.
+ */
+ public String[] getSynonyms(String word) {
+ String[] synonyms = table.get(word);
+ if (synonyms == null) return EMPTY;
+ String[] copy = new String[synonyms.length]; // copy for guaranteed immutability
+ System.arraycopy(synonyms, 0, copy, 0, synonyms.length);
+ return copy;
+ }
+
+ /**
+ * Returns a String representation of the index data for debugging purposes.
+ *
+ * @return a String representation
+ */
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ Iterator<String> iter = new TreeMap<String,String[]>(table).keySet().iterator();
+ int count = 0;
+ int f0 = 0;
+ int f1 = 0;
+ int f2 = 0;
+ int f3 = 0;
+
+ while (iter.hasNext()) {
+ String word = iter.next();
+ buf.append(word + ":");
+ String[] synonyms = getSynonyms(word);
+ buf.append(Arrays.asList(synonyms));
+ buf.append("\n");
+ count += synonyms.length;
+ if (synonyms.length == 0) f0++;
+ if (synonyms.length == 1) f1++;
+ if (synonyms.length == 2) f2++;
+ if (synonyms.length == 3) f3++;
+ }
+
+ buf.append("\n\nkeys=" + table.size() + ", synonyms=" + count + ", f0=" + f0 +", f1=" + f1 + ", f2=" + f2 + ", f3=" + f3);
+ return buf.toString();
+ }
+
+ /**
+ * Analyzes/transforms the given word on input stream loading. This default implementation simply
+ * lowercases the word. Override this method with a custom stemming
+ * algorithm or similar, if desired.
+ *
+ * @param word
+ * the word to analyze
+ * @return the same word, or a different word (or null to indicate that the
+ * word should be ignored)
+ */
+ protected String analyze(String word) {
+ return word.toLowerCase();
+ }
+
+ private static boolean isValid(String str) {
+ for (int i=str.length(); --i >= 0; ) {
+ if (!Character.isLetter(str.charAt(i))) return false;
+ }
+ return true;
+ }
+
+ private HashMap<String,String[]> read(byte[] data) {
+ int WORDS = (int) (76401 / 0.7); // presizing
+ int GROUPS = (int) (88022 / 0.7); // presizing
+ HashMap<String,ArrayList<Integer>> word2Groups = new HashMap<String,ArrayList<Integer>>(WORDS); // Map<String word, int[] groups>
+ HashMap<Integer,ArrayList<String>> group2Words = new HashMap<Integer,ArrayList<String>>(GROUPS); // Map<int group, String[] words>
+ HashMap<String,String> internedWords = new HashMap<String,String>(WORDS);// Map<String word, String word>
+
+ Charset charset = Charset.forName("UTF-8");
+ int lastNum = -1;
+ Integer lastGroup = null;
+ int len = data.length;
+ int i=0;
+
+ while (i < len) { // until EOF
+ /* Part A: Parse a line */
+
+ // scan to beginning of group
+ while (i < len && data[i] != '(') i++;
+ if (i >= len) break; // EOF
+ i++;
+
+ // parse group
+ int num = 0;
+ while (i < len && data[i] != ',') {
+ num = 10*num + (data[i] - 48);
+ i++;
+ }
+ i++;
+ // if (DEBUG) System.err.println("num="+ num);
+
+ // scan to beginning of word
+ while (i < len && data[i] != '\'') i++;
+ i++;
+
+ // scan to end of word
+ int start = i;
+ do {
+ while (i < len && data[i] != '\'') i++;
+ i++;
+ } while (i < len && data[i] != ','); // word must end with "',"
+
+ if (i >= len) break; // EOF
+ String word = charset.decode(ByteBuffer.wrap(data, start, i-start-1)).toString();
+ // String word = new String(data, 0, start, i-start-1); // ASCII
+
+ /*
+ * Part B: ignore phrases (with spaces and hyphens) and
+ * non-alphabetic words, and let user customize word (e.g. do some
+ * stemming)
+ */
+ if (!isValid(word)) continue; // ignore
+ word = analyze(word);
+ if (word == null || word.length() == 0) continue; // ignore
+
+
+ /* Part C: Add (group,word) to tables */
+
+ // ensure compact string representation, minimizing memory overhead
+ String w = internedWords.get(word);
+ if (w == null) {
+ word = new String(word); // ensure compact string
+ internedWords.put(word, word);
+ } else {
+ word = w;
+ }
+
+ Integer group = lastGroup;
+ if (num != lastNum) {
+ group = Integer.valueOf(num);
+ lastGroup = group;
+ lastNum = num;
+ }
+
+ // add word --> group
+ ArrayList<Integer> groups = word2Groups.get(word);
+ if (groups == null) {
+ groups = new ArrayList<Integer>(1);
+ word2Groups.put(word, groups);
+ }
+ groups.add(group);
+
+ // add group --> word
+ ArrayList<String> words = group2Words.get(group);
+ if (words == null) {
+ words = new ArrayList<String>(1);
+ group2Words.put(group, words);
+ }
+ words.add(word);
+ }
+
+
+ /* Part D: compute index data structure */
+ HashMap<String,String[]> word2Syns = createIndex(word2Groups, group2Words);
+
+ /* Part E: minimize memory consumption by a factor 3 (or so) */
+ // if (true) return word2Syns;
+ word2Groups = null; // help gc
+ //TODO: word2Groups.clear(); would be more appropriate ?
+ group2Words = null; // help gc
+ //TODO: group2Words.clear(); would be more appropriate ?
+
+ return optimize(word2Syns, internedWords);
+ }
+
+ private HashMap<String,String[]> createIndex(Map<String,ArrayList<Integer>> word2Groups, Map<Integer,ArrayList<String>> group2Words) {
+ HashMap<String,String[]> word2Syns = new HashMap<String,String[]>();
+
+ for (final Map.Entry<String,ArrayList<Integer>> entry : word2Groups.entrySet()) { // for each word
+ ArrayList<Integer> group = entry.getValue();
+ String word = entry.getKey();
+
+ // HashSet synonyms = new HashSet();
+ TreeSet<String> synonyms = new TreeSet<String>();
+ for (int i=group.size(); --i >= 0; ) { // for each groupID of word
+ ArrayList<String> words = group2Words.get(group.get(i));
+ for (int j=words.size(); --j >= 0; ) { // add all words
+ String synonym = words.get(j); // note that w and word are interned
+ if (synonym != word) { // a word is implicitly it's own synonym
+ synonyms.add(synonym);
+ }
+ }
+ }
+
+ int size = synonyms.size();
+ if (size > 0) {
+ String[] syns = new String[size];
+ if (size == 1)
+ syns[0] = synonyms.first();
+ else
+ synonyms.toArray(syns);
+ // if (syns.length > 1) Arrays.sort(syns);
+ // if (DEBUG) System.err.println("word=" + word + ":" + Arrays.asList(syns));
+ word2Syns.put(word, syns);
+ }
+ }
+
+ return word2Syns;
+ }
+
+ private HashMap<String,String[]> optimize(HashMap<String,String[]> word2Syns, HashMap<String,String> internedWords) {
+ if (DEBUG) {
+ System.err.println("before gc");
+ for (int i=0; i < 10; i++) System.gc();
+ System.err.println("after gc");
+ }
+
+ // collect entries
+ int len = 0;
+ int size = word2Syns.size();
+ String[][] allSynonyms = new String[size][];
+ String[] words = new String[size];
+ Iterator<Map.Entry<String,String[]>> iter = word2Syns.entrySet().iterator();
+ for (int j=0; j < size; j++) {
+ Map.Entry<String,String[]> entry = iter.next();
+ allSynonyms[j] = entry.getValue();
+ words[j] = entry.getKey();
+ len += words[j].length();
+ }
+
+ // assemble large string containing all words
+ StringBuilder buf = new StringBuilder(len);
+ for (int j=0; j < size; j++) buf.append(words[j]);
+ String allWords = new String(buf.toString()); // ensure compact string across JDK versions
+ buf = null;
+
+ // intern words at app level via memory-overlaid substrings
+ for (int p=0, j=0; j < size; j++) {
+ String word = words[j];
+ internedWords.put(word, allWords.substring(p, p + word.length()));
+ p += word.length();
+ }
+
+ // replace words with interned words
+ for (int j=0; j < size; j++) {
+ String[] syns = allSynonyms[j];
+ for (int k=syns.length; --k >= 0; ) {
+ syns[k] = internedWords.get(syns[k]);
+ }
+ word2Syns.remove(words[j]);
+ word2Syns.put(internedWords.get(words[j]), syns);
+ }
+
+ if (DEBUG) {
+ words = null;
+ allSynonyms = null;
+ internedWords = null;
+ allWords = null;
+ System.err.println("before gc");
+ for (int i=0; i < 10; i++) System.gc();
+ System.err.println("after gc");
+ }
+ return word2Syns;
+ }
+
+ // the following utility methods below are copied from Apache style Nux library - see http://dsd.lbl.gov/nux
+ private static byte[] toByteArray(InputStream input) throws IOException {
+ try {
+ // safe and fast even if input.available() behaves weird or buggy
+ int len = Math.max(256, input.available());
+ byte[] buffer = new byte[len];
+ byte[] output = new byte[len];
+
+ len = 0;
+ int n;
+ while ((n = input.read(buffer)) >= 0) {
+ if (len + n > output.length) { // grow capacity
+ byte tmp[] = new byte[Math.max(output.length << 1, len + n)];
+ System.arraycopy(output, 0, tmp, 0, len);
+ System.arraycopy(buffer, 0, tmp, len, n);
+ buffer = output; // use larger buffer for future larger bulk reads
+ output = tmp;
+ } else {
+ System.arraycopy(buffer, 0, output, len, n);
+ }
+ len += n;
+ }
+
+ if (len == output.length) return output;
+ buffer = null; // help gc
+ buffer = new byte[len];
+ System.arraycopy(output, 0, buffer, 0, len);
+ return buffer;
+ } finally {
+ if (input != null) input.close();
+ }
+ }
+
+}
\ No newline at end of file
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.tools.coref.mention.Dictionary;
+
+import net.didion.jwnl.JWNL;
+import net.didion.jwnl.JWNLException;
+import net.didion.jwnl.data.Adjective;
+import net.didion.jwnl.data.IndexWord;
+import net.didion.jwnl.data.POS;
+import net.didion.jwnl.data.Pointer;
+import net.didion.jwnl.data.PointerType;
+import net.didion.jwnl.data.Synset;
+import net.didion.jwnl.data.VerbFrame;
+import net.didion.jwnl.dictionary.MapBackedDictionary;
+import net.didion.jwnl.dictionary.MorphologicalProcessor;
+import net.didion.jwnl.dictionary.morph.DefaultMorphologicalProcessor;
+import net.didion.jwnl.dictionary.morph.DetachSuffixesOperation;
+import net.didion.jwnl.dictionary.morph.LookupExceptionsOperation;
+import net.didion.jwnl.dictionary.morph.LookupIndexWordOperation;
+import net.didion.jwnl.dictionary.morph.Operation;
+import net.didion.jwnl.dictionary.morph.TokenizerOperation;
+import net.didion.jwnl.princeton.file.PrincetonObjectDictionaryFile;
+
+/**
+ * An implementation of the Dictionary interface using the JWNL library.
+ */
+public class TopJWNLDictionary implements Dictionary {
+
+ private net.didion.jwnl.dictionary.Dictionary dict;
+ private MorphologicalProcessor morphy;
+ private static String[] empty = new String[0];
+
+ public TopJWNLDictionary(String propertiesFile) throws IOException,
+ JWNLException {
+ JWNL.initialize(this.getClass().getResourceAsStream(propertiesFile));
+ dict = net.didion.jwnl.dictionary.Dictionary.getInstance();
+ morphy = dict.getMorphologicalProcessor();
+ }
+
+ @SuppressWarnings("unchecked")
+ public String[] getLemmas(String word, String tag) {
+ try {
+ POS pos;
+ if (tag.startsWith("N") || tag.startsWith("n")) {
+ pos = POS.NOUN;
+ } else if (tag.startsWith("N") || tag.startsWith("v")) {
+ pos = POS.VERB;
+ } else if (tag.startsWith("J") || tag.startsWith("a")) {
+ pos = POS.ADJECTIVE;
+ } else if (tag.startsWith("R") || tag.startsWith("r")) {
+ pos = POS.ADVERB;
+ } else {
+ pos = POS.NOUN;
+ }
+ List<String> lemmas = morphy.lookupAllBaseForms(pos, word);
+ return lemmas.toArray(new String[lemmas.size()]);
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ public String getSenseKey(String lemma, String pos, int sense) {
+ try {
+ IndexWord iw = dict.getIndexWord(POS.NOUN, lemma);
+ if (iw == null) {
+ return null;
+ }
+ return String.valueOf(iw.getSynsetOffsets()[sense]);
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ return null;
+ }
+
+ }
+
+ public int getNumSenses(String lemma, String pos) {
+ try {
+ IndexWord iw = dict.getIndexWord(POS.NOUN, lemma);
+ if (iw == null) {
+ return 0;
+ }
+ return iw.getSenseCount();
+ } catch (JWNLException e) {
+ return 0;
+ }
+ }
+
+ private void getParents(Synset synset, List<String> parents)
+ throws JWNLException {
+ Pointer[] pointers = synset.getPointers();
+ for (int pi = 0, pn = pointers.length; pi < pn; pi++) {
+ if (pointers[pi].getType() == PointerType.HYPERNYM) {
+ Synset parent = pointers[pi].getTargetSynset();
+ parents.add(String.valueOf(parent.getOffset()));
+ getParents(parent, parents);
+ }
+ }
+ }
+
+ public String[] getParentSenseKeys(String lemma, String pos, int sense) {
+ // System.err.println("JWNLDictionary.getParentSenseKeys: lemma="+lemma);
+ try {
+ IndexWord iw = dict.getIndexWord(POS.NOUN, lemma);
+ if (iw != null) {
+ Synset synset = iw.getSense(sense + 1);
+ List<String> parents = new ArrayList<String>();
+ getParents(synset, parents);
+ return parents.toArray(new String[parents.size()]);
+ } else {
+ return empty;
+ }
+ } catch (JWNLException e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ public static void main(String[] args) throws IOException, JWNLException {
+ String searchDir = System.getProperty("WNSEARCHDIR");
+ System.err.println("searchDir=" + searchDir);
+ searchDir = "models/WordNet_2.1";
+ if (searchDir != null) {
+ Dictionary dict = new TopJWNLDictionary(
+ System.getProperty("WNSEARCHDIR"));
+ // Dictionary dict = new TopJWNLDictionary();
+ // String word = args[0];
+ String[] lemmas = dict.getLemmas("test", "NN");
+ for (int li = 0, ln = lemmas.length; li < ln; li++) {
+ for (int si = 0, sn = dict.getNumSenses(lemmas[li], "NN"); si < sn; si++) {
+ System.out.println(lemmas[li]
+ + " ("
+ + si
+ + ")\t"
+ + java.util.Arrays.asList(dict.getParentSenseKeys(
+ lemmas[li], "NN", si)));
+ }
+ }
+ }
+ }
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,137 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import opennlp.tools.coref.mention.Dictionary;
+
+public class WordDictionary {
+ private static final String[][] SPECIAL_CASES = { { "lens", "lenses" } };
+
+ //private static final String WORDNET_PROPERTITES_KEY = "wordnet.propertites.file";
+ //private static final String PROPERTIES_FILE = null;;
+
+ // private static final String DATA_DIR;
+ private static WordDictionary instance;
+
+ private Dictionary dictionary;
+ private Map<String, String> specialCaseMap;
+
+ /*static {
+ ConfigProperties config = ConfigFactory.getInstance()
+ .getConfigProperties(ConfigFactory.NLP_CONFIG_PATH);
+ PROPERTIES_FILE = config.getProperty(WORDNET_PROPERTITES_KEY);
+ }*/
+
+ public synchronized static WordDictionary getInstance() {
+ if (instance == null)
+ instance = new WordDictionary();
+
+ return instance;
+ }
+
+ private WordDictionary() {
+ // initialize the dictionary by loading the WordNet database
+ try {
+ dictionary = new TopJWNLDictionary("PROPERTIES_FILE");
+ } catch (Exception e) {
+ e.printStackTrace();
+ System.err.println("Failed to load the WordNet database: " + e);
+ }
+
+ // build the dictionary for special cases
+ specialCaseMap = buildSpecialCaseMap();
+ }
+
+ public String getLemmaOrWord(String word, String type) {
+ String lemma = getLemma(word, type);
+ if (lemma != null)
+ return lemma;
+ else
+ return (word == null) ? null : word.trim().toLowerCase();
+ }
+
+ public String getLemma(String word, String type) {
+ if (word == null)
+ return null;
+ // skip some long word,avoid dictionary getLemmas dead
+ if (word.length() >= 20)
+ return word;
+ word = word.trim().toLowerCase();
+ if (word.length() == 0)
+ return null;
+
+ // check special cases first
+ String lemma = specialCaseMap.get(word);
+ if (lemma != null)
+ return lemma;
+
+ // use the dictionary for general cases
+ // JWNLDictionary has a bug, and we have to use lower case type
+ type = (type == null) ? null : type.toLowerCase();
+ String[] lemmas = dictionary.getLemmas(word, type);
+ if (lemmas == null || lemmas.length == 0)
+ return null;
+
+ return lemmas[0];
+ }
+
+ /**
+ * get the lemma for a word of unknown POS type return the word if no lemma
+ * is found
+ *
+ * @param word
+ * @return
+ */
+ public String getLemmaOrWord(String word) {
+ if (word == null)
+ return null;
+
+ // try noun first
+ String lemma = getLemma(word, "NN");
+ if (lemma != null)
+ return lemma;
+
+ // then try verb
+ lemma = getLemma(word, "VB");
+ if (lemma != null)
+ return lemma;
+
+ // return word now
+ return word.trim().toLowerCase();
+ }
+
+ private Map<String, String> buildSpecialCaseMap() {
+
+ Map<String, String> specialCaseMap = new HashMap<String, String>();
+ for (String[] wordList : SPECIAL_CASES) {
+ String lemma = wordList[0];
+ for (String word : wordList) {
+ specialCaseMap.put(word, lemma);
+ }
+ }
+
+ return specialCaseMap;
+ }
+
+ public static void main(String[] args) {
+ String[] verbs = { "is", "has", "were", "likes", "TaKen", "going" };
+ String[] nouns = { "efficient", "Cars", "lens", "wives", "lenses",
+ "photos" };
+ String[] adverbs = { "would", "could", "should", "might" };
+ WordDictionary dictionary = WordDictionary.getInstance();
+
+ for (String word : verbs) {
+ System.out
+ .println(word + " ==> " + dictionary.getLemma(word, "VB"));
+ }
+ for (String word : nouns) {
+ System.out
+ .println(word + " ==> " + dictionary.getLemma(word, "NN"));
+ }
+ for (String word : adverbs) {
+ System.out
+ .println(word + " ==> " + dictionary.getLemma(word, "JJ"));
+ }
+ }
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,68 @@
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.similarity.apps.BingQueryRunner;
+import opennlp.tools.similarity.apps.HitBase;
+
+import org.apache.commons.lang.StringUtils;
+
+public class BingAPIProductSearchManager {
+ BingQueryRunner search = new BingQueryRunner();
+
+ public List<HitBase> findProductByName(String name, int count){
+ List<HitBase> foundFBPages = search.runSearch("site:amazon.com"+" "+name + " reviews"
+ , 10);
+ List<HitBase> results = new ArrayList<HitBase>();
+ int ct=0;
+ for(HitBase h: foundFBPages){
+ if (ct>=count) break; ct++;
+ String title = h.getTitle().toLowerCase();
+ if (h.getUrl().indexOf("amazon.com")<0)
+ continue;
+ String[] merchantWords = name.toLowerCase().split(" ");
+ int overlapCount=0;
+/* for(String commonWord:merchantWords){
+ if (title.indexOf(commonWord+" ")>-1 || title.indexOf(" "+commonWord)>-1){
+ overlapCount++;
+ System.out.println(" found word "+ commonWord + " in title = "+title);
+ }
+ }
+ float coverage = (float)overlapCount/(float) (merchantWords.length);
+ if ((coverage>0.4 || (coverage>0.5f && merchantWords.length <4 )))
+*/ results.add(h);
+ }
+ return results;
+ }
+
+ public List<HitBase> findProductByNameNoReview(String name, int count){
+ List<HitBase> foundFBPages = search.runSearch(name, count);
+ List<HitBase> results = new ArrayList<HitBase>();
+ int ct=0;
+ for(HitBase h: foundFBPages){
+ if (ct>=count) break; ct++;
+ String title = h.getTitle().toLowerCase();
+ String[] merchantWords = name.toLowerCase().split(" ");
+ int overlapCount=0;
+ for(String commonWord:merchantWords){
+ if (title.indexOf(commonWord+" ")>-1 || title.indexOf(" "+commonWord)>-1){
+ overlapCount++;
+ System.out.println(" found word "+ commonWord + " in title = "+title);
+ }
+ }
+ float coverage = (float)overlapCount/(float) (merchantWords.length);
+ if ((coverage>0.4 || (coverage>0.5f && merchantWords.length <4 )))
+ results.add(h);
+ }
+ return results;
+ }
+
+
+
+ public static void main(String[] args){
+ BingAPIProductSearchManager man = new BingAPIProductSearchManager ();
+ List<HitBase> res = man.findProductByName("chain saw", 5);
+ System.out.println(res);
+ }
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,143 @@
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.List;
+import org.apache.commons.lang.StringUtils;
+
+import opennlp.tools.jsmlearning.ProfileReaderWriter;
+import opennlp.tools.similarity.apps.utils.PageFetcher;
+import com.restfb.Connection;
+import com.restfb.DefaultFacebookClient;
+import com.restfb.FacebookClient;
+import com.restfb.Parameter;
+import com.restfb.exception.FacebookException;
+import com.restfb.types.Event;
+import com.restfb.types.Page;
+
+
+public class FBOpenGraphSearchManager {
+
+ public List<String[]> profiles = null;
+ protected FacebookClient mFBClient;
+ protected PageFetcher pageFetcher = new PageFetcher();
+ protected static final int NUM_TRIES = 5;
+ protected static final long WAIT_BTW_TRIES=1000; //milliseconds between re-tries
+
+
+ public FBOpenGraphSearchManager(){
+ profiles = ProfileReaderWriter.readProfiles("C:\\nc\\features\\analytics\\dealanalyzer\\sweetjack-localcoupon-may12012tooct302012.csv");
+
+ }
+
+
+ public void setFacebookClient(FacebookClient c){
+ this.mFBClient=c;
+ }
+
+ public List<Event> getFBEventsByName(String event)
+ {
+ List<Event> events = new ArrayList<Event>();
+
+ for(int i=0; i < NUM_TRIES; i++)
+ {
+ try
+ {
+ Connection<Event> publicSearch =
+ mFBClient.fetchConnection("search", Event.class,
+ Parameter.with("q", event), Parameter.with("type", "event"),Parameter.with("limit", 100));
+ System.out.println("Searching FB events for " + event);
+ events= publicSearch.getData();
+ break;
+ }
+ catch(FacebookException e)
+ {
+ System.out.println("FBError "+e);
+ try
+ {
+ Thread.sleep(WAIT_BTW_TRIES);
+ }
+ catch (InterruptedException e1)
+ {
+ // TODO Auto-generated catch block
+ System.out.println("Error "+e1);
+ }
+ }
+ }
+ return events;
+ }
+
+ public Long getFBPageLikes(String merchant)
+ {
+ List<Page> groups = new ArrayList<Page>();
+
+ for(int i=0; i < NUM_TRIES; i++)
+ {
+ try
+ {
+ Connection<Page> publicSearch =
+ mFBClient.fetchConnection("search", Page.class,
+ Parameter.with("q", merchant), Parameter.with("type", "page"),Parameter.with("limit", 100));
+ System.out.println("Searching FB Pages for " + merchant);
+ groups= publicSearch.getData();
+ break;
+ }
+ catch(FacebookException e)
+ {
+ System.out.println("FBError "+e);
+ try
+ {
+ Thread.sleep(WAIT_BTW_TRIES);
+ }
+ catch (InterruptedException e1)
+ {
+ // TODO Auto-generated catch block
+ System.out.println("Error "+e1);
+ }
+ }
+ }
+
+ for (Page p: groups){
+ if (p!=null && p.getLikes()!=null && p.getLikes()>0)
+ return p.getLikes();
+ }
+
+ //stats fwb">235</span>
+
+ for (Page p: groups){
+ if (p.getId()==null)
+ continue;
+ String content = pageFetcher.fetchOrigHTML("http://www.facebook.com/"+p.getId());
+
+ String likes = StringUtils.substringBetween(content, "stats fwb\">", "<" );
+ if (likes==null)
+ continue;
+ Integer nLikes =0;
+ try {
+ nLikes = Integer.parseInt(likes);
+ } catch (Exception e){
+
+ }
+ if (nLikes>0){
+ return (long)nLikes;
+ }
+
+ }
+
+
+ return null;
+ }
+
+
+ //
+
+ public static void main(String[] args){
+ FBOpenGraphSearchManager man = new FBOpenGraphSearchManager ();
+ man.setFacebookClient(new DefaultFacebookClient());
+
+
+ long res = man.getFBPageLikes("chain saw");
+ System.out.println(res);
+
+ }
+}
Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java Mon Jan 6 17:48:30 2014
@@ -0,0 +1,86 @@
+package opennlp.tools.apps.review_builder;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.URLDecoder;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class MachineTranslationWrapper {
+ private String translatorURL = "http://mymemory.translated.net/api/get?q=";
+
+ public String translate(String sentence, String lang2lang){
+ if (sentence==null)
+ return null;
+ String request = translatorURL + sentence.replace(' ','+') + "&langpair="+lang2lang;//"en|es";
+ JSONArray arr=null, prodArr = null, searchURLviewArr = null;
+ try {
+ URL urlC = new URL(request);
+ URLConnection connection = urlC.openConnection();
+
+ String line;
+ String result = "";
+ BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
+ int count = 0;
+ while ((line = reader.readLine()) != null)
+ {
+ result+=line;
+ count++;
+ }
+ JSONObject rootObject = new JSONObject(result);
+ JSONObject findObject = rootObject.getJSONObject("responseData");
+ String transl = findObject.getString("translatedText");
+ try {
+ transl = URLDecoder.decode(transl, "UTF-8");
+ } catch (Exception e) {
+
+ }
+
+ return transl;
+
+ } catch (MalformedURLException e) {
+
+ e.printStackTrace();
+ return null;
+ } catch (JSONException e) {
+ e.printStackTrace();
+ return null;
+ } catch (IOException e) {
+ e.printStackTrace();
+ return null;
+ }
+
+ }
+
+ public String rePhrase(String sentence){
+ System.out.println("orig = "+ sentence);
+ String transl = translate(sentence, "en|es");
+ System.out.println("tranls = "+transl);
+ String inverseTransl = translate(transl, "es|en");
+ if (!(inverseTransl.indexOf("NO QUERY SPECIFIED")>-1) && !(inverseTransl.indexOf("INVALID LANGUAGE")>-1) && !(inverseTransl.indexOf("MYMEMORY WARNING")>-1))
+ return inverseTransl;
+ else
+ return sentence;
+ }
+
+
+
+ public static void main(String[] args){
+ MachineTranslationWrapper rePhraser = new MachineTranslationWrapper();
+
+ System.out.println(rePhraser.translate("I went to the nearest bookstore to buy a book written by my friend and his aunt", "en|ru"));
+
+ System.out.println(rePhraser.rePhrase("I went to the nearest bookstore to buy a book written by my friend and his aunt"));
+
+ }
+
+}