You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by bg...@apache.org on 2014/01/06 18:48:32 UTC

svn commit: r1555944 [1/11] - in /opennlp/sandbox/opennlp-similarity/src: main/java/opennlp/tools/apps/ main/java/opennlp/tools/apps/contentgen/ main/java/opennlp/tools/apps/contentgen/multithreaded/ main/java/opennlp/tools/apps/relevanceVocabs/ main/j...

Author: bgalitsky
Date: Mon Jan  6 17:48:30 2014
New Revision: 1555944

URL: http://svn.apache.org/r1555944
Log:
OPENNLP-628

Added:
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MinedSentenceProcessor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ParserConstants.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewBuilderRunner.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/ReviewObj.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceBeingOriginalized.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/SentenceOriginalizer.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/URLsWithReviewFinderByProductName.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/WebPageReviewExtractor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/EmailSender.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/utils/email/SMTP_Authenticator.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/FeatureSpaceCoverageProcessor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/IntersectionSetBuilder.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/ProfileReaderWriter.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/TreeKernelRunner.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ArcType.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/IGeneralizer.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/PTTree.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/Pair.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseCorefsBuilder.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseThicket.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/ParseTreeNode.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/Triple.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/WordWordInterSentenceRelationArc.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/BingQueryRunnerMultipageSearchResults.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MinedSentenceProcessor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MostFrequentWordsFromPageGetter.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/SnippetToParagraph.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageContentSentenceExtractor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/apps/WebPageExtractor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilder.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsAttribute.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceExtendedForestSearchResultsProcessorSetFormer.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedExtendedForestSearchResultsProcessor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/MultiSentenceKernelBasedSearchResultsProcessor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/PT2ExtendedTreeForestBuilder.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/SnippetToParagraphFull.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeExtenderByAnotherLinkedTree.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/kernel_interface/TreeKernelRunner.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/GeneralizationListReducer.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/LemmaFormManager.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/Matcher.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilder.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreeChunkListScorer.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePath.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathComparable.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathMatcher.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/matching/ParseTreePathMatcherDeterministic.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/EdgeProductBuilder.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/GraphFromPTreeBuilder.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseGraphNode.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/parse_thicket2graph/ParseTreeVisualizer.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhraseConcept.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructure.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureArcsBuilder.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarker.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGenerator.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorRunner.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/ContentGeneratorSupport.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinderML.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/YahooAnswersMiner.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/ContentGeneratorRequestHandler.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeSearchRequestHandler.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/NLProgram2CodeRequestHandler.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SearchResultsReRankerRequestHandler.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/cgRequestForm.html
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/solrconfig.xml
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/MultiSentenceSearchResultsProcessorTest.java
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/RelatedSentenceFinderTest.java
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/SnippetToParagraphTest.java
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/apps/StoryDiscourseNavigatorTest.java
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/communicative_actions/CommunicativeActionsArcBuilderTest.java
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/kernel_interface/
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PT2ThicketPhraseBuilderTest.java
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTMatcherTest.java
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PTPhraseBuilderTest.java
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/matching/PairwiseMatcherTest.java
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/parse_thicket2graph/
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/pattern_structure/PhrasePatternStructureTest.java
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/
    opennlp/sandbox/opennlp-similarity/src/test/java/opennlp/tools/parse_thicket/rhetoric_structure/RhetoricStructureMarkerTest.java
    opennlp/sandbox/opennlp-similarity/src/test/resources/tree_kernel/
    opennlp/sandbox/opennlp-similarity/src/test/resources/tree_kernel/tree_kernel.zip   (with props)
Modified:
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2Obj.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/nl2code/NL2ObjCreateAssign.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/BingQueryRunner.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/GeneratedSentenceProcessor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBase.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/HitBaseComparable.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/RelatedSentenceFinder.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SearchResultsProcessor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/SpeechRecognitionResultsProcessor.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/StoryDiscourseNavigator.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/IterativeQueryComponent.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/solr/SyntGenRequestHandler.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/taxo_builder/TaxonomyExtenderViaMebMining.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/similarity/apps/utils/PageFetcher.java
    opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/textsimilarity/chunker2matcher/ParserChunker2MatcherProcessor.java

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/BingWebQueryRunnerThread.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,54 @@
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.similarity.apps.BingQueryRunner;
+import opennlp.tools.similarity.apps.HitBase;
+
+public class BingWebQueryRunnerThread extends BingQueryRunner implements Runnable{
+	
+	private String query;
+	private List<HitBase> results= new ArrayList<HitBase>();
+	public BingWebQueryRunnerThread(String Query){
+		super();
+		this.query=Query;
+	}
+	public void run(){
+		results=runSearch(query);
+		fireMyEvent(new MyEvent(this));
+	}
+	public List<HitBase> getResults() {
+		return results;
+	}
+	
+	public String getQuery() {
+		return query;
+	}
+	
+	// Create the listener list
+    protected javax.swing.event.EventListenerList listenerList = new javax.swing.event.EventListenerList();
+    // This methods allows classes to register for MyEvents 
+
+    public void addMyEventListener(MyEventListener listener) {
+        listenerList.add(MyEventListener.class, listener);
+    }
+    // This methods allows classes to unregister for MyEvents
+
+    public void removeMyEventListener(MyEventListener listener) {
+        listenerList.remove(MyEventListener.class, listener);
+    }
+
+    void fireMyEvent(MyEvent evt) {
+        Object[] listeners = listenerList.getListenerList();
+        // Each listener occupies two elements - the first is the listener class
+        // and the second is the listener instance
+        for (int i = 0; i < listeners.length; i += 2) {
+            if (listeners[i] == MyEventListener.class) {
+                ((MyEventListener) listeners[i + 1]).MyEvent(evt);
+            }
+        }
+    }
+	
+
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/Fragment.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,88 @@
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import opennlp.tools.similarity.apps.utils.StringDistanceMeasurer;
+
+
+public class Fragment {
+	
+		public String resultText;      // result
+		public double score;
+		public String fragment; // original
+		public String sourceURL;
+
+		Fragment(String text, double score) {
+			this.resultText = text;
+			this.score = score;
+		}
+		
+			
+		public String getResultText() {
+			return resultText;
+		}
+
+		public void setResultText(String resultText) {
+			this.resultText = resultText;
+		}
+
+
+
+		public double getScore() {
+			return score;
+		}
+
+
+
+		public void setScore(double score) {
+			this.score = score;
+		}
+
+
+
+		public String getFragment() {
+			return fragment;
+		}
+
+
+
+		public void setFragment(String fragment) {
+			this.fragment = fragment;
+		}
+
+		
+
+		public String getSourceURL() {
+			return sourceURL;
+		}
+
+
+		public void setSourceURL(String sourceURL) {
+			this.sourceURL = sourceURL;
+		}
+
+
+		public String toString(){
+			return this.resultText;
+		}
+
+		@Override
+		public boolean equals(Object o) {
+			if (this == o) return true;
+			if (o == null || getClass() != o.getClass()) return false;
+
+			Fragment fragment = (Fragment) o;
+
+			if (resultText == null && fragment.resultText == null) {
+				return true;
+			} else if ((resultText == null && fragment.resultText != null) || (resultText != null && fragment.resultText == null)) {
+				return false;
+			}
+
+			StringDistanceMeasurer sdm = new StringDistanceMeasurer();
+			return sdm.measureStringDistance(resultText, fragment.resultText) > 0.8;
+		}
+
+		@Override
+		public int hashCode() {
+			return resultText != null ? resultText.hashCode() : 0;
+		}
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEvent.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,12 @@
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import java.util.EventObject;
+
+public class MyEvent extends EventObject {
+
+	public MyEvent(Object arg0) {
+		super(arg0);
+		// TODO Auto-generated constructor stub
+	}
+
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/MyEventListener.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,8 @@
+package opennlp.tools.apps.contentgen.multithreaded;
+
+import java.util.EventListener;
+
+
+public interface MyEventListener extends EventListener{
+	public void MyEvent(MyEvent evt);
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/cgRequestForm.html Mon Jan  6 17:48:30 2014
@@ -0,0 +1,37 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+ 
+<html xmlns='http://www.w3.org/1999/xhtml'>
+   <head >
+      <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>
+      <title >Submit Your Essay Writing request here</title>
+   </head>
+<body>
+<h1>Submit Your Essay Writing request here / Envie su solicitud ensayo escrito aqui</h1>
+ 
+<form id='sampleform' method='post' action='http://173.255.254.250:8983/solr/contentgen/?resourceDir=/home/solr/solr-4.4.0/example/src/test/resources&workDir=/home/solr/solr-4.4.0/example/solr-webapp/webapp/WEB-INF/lib&relevanceThreshold=0.5&bingKey=e8ADxIjn9YyHx36EihdjH/tMqJJItUrrbPTUpKahiU0=' >
+   <p>
+   Topic for your essay/Tema de su ensayo: <input type='text' name='q' value='albert einstein' size='35' maxlength='100'/>
+   </p>
+   <p>
+   Email to receive your essay/para recibir su ensayo: <input type='text' name='email' />
+   </p>
+   
+   <p>
+   Select language/seleccionar el idioma: <select name="lang" >
+   		<option value="en-US"> English</option>
+ 		<option value="es-US"> Espaniol</option>
+ 		<option value="de-DE"> German</option>
+	</select>
+	</p>
+	<p>
+   Number of Bing calls to write a this essay: <input type='text' name='stepsNum' value='20' size='5' maxlength='10'/>
+   Number of Bing search results for each call to use for writing: <input type='text' name='searchResultsNum' value='100' size='5' maxlength='10'/>
+   </p>
+<p>
+   <input type='submit' name='Submit' value='Submit/presentar' />
+   </p>
+</form>
+ 
+</body>
+</html>

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/contentgen/multithreaded/nlProg2codeRequestForm.html Mon Jan  6 17:48:30 2014
@@ -0,0 +1,47 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
+"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
+ 
+<html xmlns='http://www.w3.org/1999/xhtml'>
+   <head >
+      <meta http-equiv='Content-Type' content='text/html; charset=utf-8'/>
+      <title >Submit Your Code Writing request here</title>
+   </head>
+<body>
+<h1>Submit Your Code Writing request here</h1>
+ 
+<form id='sampleform' method='post' action='http://173.255.254.250:8983/solr/nlprog2code/?' >
+   <p>
+   Write what you want your program to do in natural language <input type='text' name='line' value='define a class named ...' size='35' maxlength='120'/>
+   </p>
+    <p>
+    <input type='text' name='line' value='define a function taking a string s1 and an integer i2 ' size='35' maxlength='150'/>
+   </p>
+   <p>
+     <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+    <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+    <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+     <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+    <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+    <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   <p>
+    <input type='text' name='line' size='35' maxlength='200'/>
+   </p>
+   
+<p>
+   <input type='submit' name='Submit' value='Submit' />
+   </p>
+</form>
+ 
+</body>
+</html>

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/POStags.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,74 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+public interface POStags {
+	// added new POS types for infinitive phrase and participle phrase
+	public static final String TYPE_STP = "STP"; // infinitive phrase
+	public static final String TYPE_SGP = "SGP"; // present participle phrase
+	public static final String TYPE_SNP = "SNP"; // past participle phrase
+
+	// below are the standard POS types,
+	// http://bulba.sdsu.edu/jeanette/thesis/PennTags.html
+	public static final String TYPE_ADJP = "ADJP";
+	public static final String TYPE_ADVP = "ADVP";
+	public static final String TYPE_CC = "CC";
+	public static final String TYPE_CD = "CD";
+	public static final String TYPE_CONJP = "CONJP";
+	public static final String TYPE_DT = "DT";
+	public static final String TYPE_EX = "EX";
+	public static final String TYPE_FRAG = "FRAG";
+	public static final String TYPE_FW = "FW";
+	public static final String TYPE_IN = "IN";
+	public static final String TYPE_INTJ = "INTJ";
+	public static final String TYPE_JJ = "JJ";
+	public static final String TYPE_JJR = "JJR";
+	public static final String TYPE_JJS = "JJS";
+	public static final String TYPE_LS = "LS";
+	public static final String TYPE_LST = "LST";
+	public static final String TYPE_MD = "MD";
+	public static final String TYPE_NAC = "NAC";
+	public static final String TYPE_NN = "NN";
+	public static final String TYPE_NNS = "NNS";
+	public static final String TYPE_NNP = "NNP";
+	public static final String TYPE_NNPS = "NNPS";
+	public static final String TYPE_NP = "NP";
+	public static final String TYPE_NX = "NX";
+	public static final String TYPE_PDT = "PDT";
+	public static final String TYPE_POS = "POS";
+	public static final String TYPE_PP = "PP";
+	public static final String TYPE_PRN = "PRN";
+	public static final String TYPE_PRP = "PRP";
+	public static final String TYPE_PRP$ = "PRP$";
+	public static final String TYPE_PRT = "PRT";
+	public static final String TYPE_QP = "QP";
+	public static final String TYPE_RB = "RB";
+	public static final String TYPE_RBR = "RBR";
+	public static final String TYPE_RBS = "RBS";
+	public static final String TYPE_RP = "RP";
+	public static final String TYPE_RRC = "RRC";
+	public static final String TYPE_S = "S";
+	public static final String TYPE_SBAR = "SBAR";
+	public static final String TYPE_SBARQ = "SBARQ";
+	public static final String TYPE_SINV = "SINV";
+	public static final String TYPE_SQ = "SQ";
+	public static final String TYPE_SYM = "SYM";
+	public static final String TYPE_TO = "TO";
+	public static final String TYPE_TOP = "TOP";
+	public static final String TYPE_UCP = "UCP";
+	public static final String TYPE_UH = "UH";
+	public static final String TYPE_VB = "VB";
+	public static final String TYPE_VBD = "VBD";
+	public static final String TYPE_VBG = "VBG";
+	public static final String TYPE_VBN = "VBN";
+	public static final String TYPE_VBP = "VBP";
+	public static final String TYPE_VBZ = "VBZ";
+	public static final String TYPE_VP = "VP";
+	public static final String TYPE_WDT = "WDT";
+	public static final String TYPE_WHADJP = "WHADJP";
+	public static final String TYPE_WHADVP = "WHADVP";
+	public static final String TYPE_WHNP = "WHNP";
+	public static final String TYPE_WHPP = "WHPP";
+	public static final String TYPE_WP = "WP";
+	public static final String TYPE_WP$ = "WP$";
+	public static final String TYPE_WRB = "WRB";
+	public static final String TYPE_X = "X";
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/PhraseProcessor.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,215 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+
+import opennlp.tools.parser.Parse;
+import opennlp.tools.textsimilarity.ParseTreeChunk;
+import opennlp.tools.textsimilarity.TextProcessor;
+import opennlp.tools.textsimilarity.chunker2matcher.ParserChunker2MatcherProcessor;
+import opennlp.tools.util.Span;
+
+public class PhraseProcessor {
+	
+	private ParserChunker2MatcherProcessor nlProc = ParserChunker2MatcherProcessor.getInstance() ;
+	
+	public static boolean allChildNodesArePOSTags(Parse p)
+	{
+		Parse[] subParses = p.getChildren();
+		for (int pi = 0; pi < subParses.length; pi++)
+			if (!((Parse) subParses[pi]).isPosTag())
+				return false;
+		return true;
+	}
+	
+	public ArrayList<String> getNounPhrases(Parse p)
+	{
+		ArrayList<String> nounphrases = new ArrayList<String>();
+
+		Parse[] subparses = p.getChildren();
+		for (int pi = 0; pi < subparses.length; pi++)
+		{
+
+			if (subparses[pi].getType().equals("NP") && allChildNodesArePOSTags(subparses[pi]))
+			{
+				Span _span = subparses[pi].getSpan();
+				nounphrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
+			}
+			else if (!((Parse) subparses[pi]).isPosTag())
+				nounphrases.addAll(getNounPhrases(subparses[pi]));
+		}
+
+		return nounphrases;
+	}
+	
+	public ArrayList<String> getVerbPhrases(Parse p)
+	{
+		ArrayList<String> verbPhrases = new ArrayList<String>();
+
+		Parse[] subparses = p.getChildren();
+		for (int pi = 0; pi < subparses.length; pi++)
+		{
+
+			if (subparses[pi].getType().startsWith("VB") && allChildNodesArePOSTags(subparses[pi]))
+			{
+				Span _span = subparses[pi].getSpan();
+				verbPhrases.add(p.getText().substring(_span.getStart(), _span.getEnd()));
+			}
+			else if (!((Parse) subparses[pi]).isPosTag())
+				verbPhrases.addAll(getNounPhrases(subparses[pi]));
+		}
+
+		return verbPhrases;
+	}
+	
+	// forms phrases from text which are candidate expressions for events lookup
+			public List<ParseTreeChunk> getVerbPhrases(String sentence) {
+				if (sentence==null)
+					return null;
+				if (sentence.split(" ").length ==1) { // this is a word, return empty
+					//queryArrayStr.add( sentence);
+					return null;
+				}
+				if (sentence.length()>100)
+					return null ; // too long of a sentence to parse
+				
+				System.out.println("About to parse: "+sentence);
+				List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 
+				if (groupedChunks.size()<1)
+					return null;
+
+				List<ParseTreeChunk> vPhrases = groupedChunks.get(1);
+				
+				return vPhrases;
+			}
+
+			public List<List<ParseTreeChunk>> getPhrasesOfAllTypes(String sentence) {
+				if (sentence==null)
+					return null;
+				if (sentence.split(" ").length ==1) { // this is a word, return empty
+					//queryArrayStr.add( sentence);
+					return null;
+				}
+				if (sentence.length()>200)
+					return null ; // too long of a sentence to parse
+				
+				System.out.println("About to parse: "+sentence);
+				List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 
+				if (groupedChunks.size()<1)
+					return null;
+
+				return groupedChunks;
+			}
+	
+	// forms phrases from text which are candidate expressions for events lookup
+		public List<String> extractNounPhraseProductNameCandidate(String sentence) {
+			
+			List<String> queryArrayStr = new ArrayList<String>();
+			
+			if (sentence.split(" ").length ==1) { // this is a word, return empty
+				//queryArrayStr.add( sentence);
+				return queryArrayStr;
+			}
+			String quoted1 = StringUtils.substringBetween(sentence, "\"", "\"");
+			String quoted2 = StringUtils.substringBetween(sentence, "\'", "\'");
+			List<List<ParseTreeChunk>> groupedChunks = nlProc.formGroupedPhrasesFromChunksForPara(sentence); 
+			if (groupedChunks.size()<1)
+				return queryArrayStr;
+
+			List<ParseTreeChunk> nPhrases = groupedChunks.get(0);
+
+			for (ParseTreeChunk ch : nPhrases) {
+				String query = "";
+				int size = ch.getLemmas().size();
+				boolean phraseBeingFormed = false;
+				for (int i = 0; i < size; i++) {
+					if ((ch.getPOSs().get(i).startsWith("N") || ch.getPOSs().get(i)
+							.startsWith("J") || ch.getPOSs().get(i).startsWith("CD") ) )
+					//		&& StringUtils.isAlpha(ch.getLemmas().get(i)))
+					{
+						query += ch.getLemmas().get(i) + " ";
+						phraseBeingFormed = true;
+					} else 
+						if ((ch.getPOSs().get(i).startsWith("PR") || ch.getPOSs().get(i).startsWith("IN") || ch.getPOSs().get(i).startsWith("TO")  ) 
+								&& phraseBeingFormed )
+							break;
+						else if (ch.getPOSs().get(i).startsWith("DT") || ch.getPOSs().get(i).startsWith("CC"))
+						continue;
+				}
+				query = query.trim();
+				int len = query.split(" ").length;
+				if (len > 5 || len < 2) // too long or too short
+					continue;
+				
+	/*				
+				if (len < 4 && len>1) { // every word should start with capital
+					String[] qs = query.split(" ");
+					boolean bAccept = true;
+					for (String w : qs) {
+						if (w.toLowerCase().equals(w)) // idf only two words then
+														// has to be person name,
+														// title or geo
+														// location
+							bAccept = false;
+					}
+					if (!bAccept)
+						continue;
+				}
+		*/		
+				 // individual word, possibly a frequent word
+				// if len==1 do nothing
+
+				query = query.trim();
+				queryArrayStr.add(query);
+
+			}
+	/*		
+			if (queryArrayStr.size() < 1) { // release constraints on NP down to 2
+											// keywords
+				for (ParseTreeChunk ch : nPhrases) {
+					String query = "";
+					int size = ch.getLemmas().size();
+
+					for (int i = 0; i < size; i++) {
+						if (ch.getPOSs().get(i).startsWith("N")
+								|| ch.getPOSs().get(i).startsWith("J")) {
+							query += ch.getLemmas().get(i) + " ";
+						}
+					}
+					query = query.trim();
+					int len = query.split(" ").length;
+					if (len < 2)
+						continue;
+
+					query = TextProcessor.fastTokenize(query.toLowerCase(), false)
+							.toString().replace('[', ' ').replace(']', ' ').trim();
+					if (query.length() > 6)
+						queryArrayStr.add(query);
+				}
+			}
+			//queryArrayStr = Utils
+			//		.removeDuplicatesFromQueries(queryArrayStr);
+			if (quoted1 != null
+					&& ((quoted1.length() > 5 && !stopList.isCommonWord(quoted1)) || quoted1
+							.length() > 10))
+				queryArrayStr.add(quoted1);
+			if (quoted2 != null
+					&& ((quoted2.length() > 5 && !stopList.isCommonWord(quoted2)) || quoted2
+							.length() > 10))
+				queryArrayStr.add(quoted2);
+		*/	return queryArrayStr;
+		}
+		
+
+	
+		
+		public static void main(String[] args){
+			String sent = "Appliances and Kitchen Gadgets - CNET Blogs";
+					//"The tablet phenomenon turns Silicon Valley upside down - SiliconValley.com";
+			List<String> res = new PhraseProcessor().extractNounPhraseProductNameCandidate(sent);
+			System.out.println(res);
+		}
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SentimentVocab.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,199 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+
+
+public class SentimentVocab {
+	private static final String[] POSITIVE_ADJECTTIVE_LIST = { "accessible",
+			"advanced", "affordable", "amazing", "awesome", "beautiful",
+			"brilliant", "capable", "classic", "clear", "comfortable",
+			"convenient", "cool", "courteous", "cute", "decent", "delight",
+			"easy", "elegant", "enjoyable", "enough", "excellent",
+			"exceptional", "fabulous", "fancy", "fantastic", "fast",
+			"favorable", "fine", "friendly", "fun", "good", "great", "handy",
+			"happy", "hefty", "helpful", "high", "immaculate", "impressive",
+			"incredible", "interesting", "jealous", "lovely", "lucky",
+			"luxurious", "marvelous", "maximum", "memorable", "neat", "nice",
+			"outstanding", "perfect", "pleasant", "positive", "pretty",
+			"powerful", "quiet", "reasonable", "remarkable", "right", "safe",
+			"silky", "sleek", "slick", "stylish", "suitable", "superb",
+			"tasteful", "terrific", "top", "unbelievable", "useful",
+			"welcoming", "wonderful", "worthwhile" };
+
+	private static final String[] NEGATIVE_ADJECTTIVE_LIST = { "angry",
+			"annoyed", "annoying", "anxious", "arrogant", "ashamed", "awful",
+			"bad", "bored", "boring", "broke", "broken", "clumsy",
+			"complicate", "complicated", "confused", "cranky", "crazy",
+			"cumbersome", "defective", "depressed", "dead", "depressing",
+			"difficult", "dirty", "disappointed", "disappointing", "disgusted",
+			"disgusting", "disheartened", "disheartening", "dissatisfactory",
+			"dissatisfying", "distant", "disturbed", "dizzy", "doubtful",
+			"down", "drab", "dull", "dysfunctional", "embarrassed", "evil",
+			"exhausted", "fatal", "filthy", "flawed", "fragile", "frightened",
+			"frustrating", "goofy", "grieving", "hard", "horrific",
+			"horrifying", "harsh", "horrible", "impossible", "inconvenient",
+			"insane", "lack", "lacking", "lazy", "leaking", "leaky", "lonely",
+			"low", "mediocre", "messy", "mysterious", "nasty", "naughty",
+			"negative", "noisy", "nonclean", "nutty", "outdated", "outrageous",
+			"over priced", "pathetic", "poor", "premature", "pricey", "pricy",
+			"problematic", "putrid", "puzzled", "rickety", "ridiculous",
+			"ripped off", "rugged", "slow", "stinky", "strange", "stupid",
+			"sweaty", "tedious", "terrible", "tired", "tough", "toxic",
+			"trubled", "ugly", "unbearable", "unclean", "uncomfortable",
+			"unfortunate", "unhelpful", "uninviting", "unpleasent",
+			"unsanitary", "upseting", "unusable", "weird", "worn", "worn down",
+			"wretched", "wrong" };
+
+	private static final String[] POSITIVE_ADVERB_LIST = { "absolutely",
+			"amazingly", "completely", "definitely", "easily", "fairly",
+			"highly", "immensely", "incredibly", "nicely", "really", "rich",
+			"simply", "surprisingly", "tastefully", "totally", "truly", "very",
+			"well" };
+
+	private static final String[] NEGATIVE_ADVERB_LIST = { "badly",
+			"deceptfully", "down", "horribly", "oddly", "pathetically",
+			"terribly", "too", "unfortunately" };
+
+	private static final String[] POSITIVE_NOUN_LIST = { "ability", "benefit",
+			"character", "charm", "comfort", "discount", "dream", "elegance",
+			"favourite", "feature", "improvement", "luck", "luxury", "offer",
+			"pro", "quality", "requirement", "usability" };
+
+	private static final String[] NEGATIVE_NOUN_LIST = { "blocker",
+			"challenge", "complain", "complaint", "compromise", "con",
+			"concern", "crap", "disappointment", "disillusion", "doubt",
+			"downside", "drawback", "embarrassment", "error", "failure",
+			"fault", "garbage", "glitch", "inability", "issue", "junk",
+			"long line", "malfunction", "mess", "mistake", "nightmare",
+			"noise", "odor", "pain", "pitfall", "problem", "rip off", "roach",
+			"rude", "sacrifice", "shame", "shock", "stain", "threat",
+			"trouble", "urine", "worry" };
+
+	private static final String[] POSITIVE_VERB_LIST = { "admire", "amaze",
+			"assist", "disgust", "enjoy", "help", "guarantee", "impress",
+			"improve", "like", "love", "patronize", "prefer", "recommend",
+			"want" };
+
+	private static final String[] NEGATIVE_VERB_LIST = { "annoy", "appall",
+			"break", "complain", "confuse", "depress", "disappoint",
+			"dishearten", "dislike", "dissatisfy", "embarrass", "fail", "fear",
+			"flaw", "frustrate", "hate", "ruin", "scare", "stink", "suck",
+			"think twice", "thwart", "upset", "vomit" };
+
+	public static final int SENTIMENT_POSITIVE = 1;
+	public static final int SENTIMENT_UNKNOWN = 0;
+	public static final int SENTIMENT_NEGATIVE = -1;
+
+	private static SentimentVocab instance = new SentimentVocab();
+
+	// complete sentiment word map, key = word, value = sentiment object
+	private Map<String, Sentiment> sentimentMap = new HashMap<String, Sentiment>();
+
+	// sentiment word sets, key = POS type, value = word set
+	private Map<String, HashSet<String>> wordSetMap = new HashMap<String, HashSet<String>>();
+
+	public static class Sentiment {
+		public String posType;
+		public int sentimentType;
+
+		Sentiment(String posType, int sentimentType) {
+			this.posType = posType;
+			this.sentimentType = sentimentType;
+		}
+	}
+
+	public static SentimentVocab getInstance() {
+		return instance;
+	}
+
+	public Sentiment getSentiment(String word) {
+		if (word == null)
+			return null;
+
+		// get the normalized form of the word
+		//word = WordDictionary.getInstance().getLemmaOrWord(word);
+
+		return sentimentMap.get(word);
+	}
+
+	public Sentiment getSentiment(String word, String posType) {
+		if (word == null)
+			return null;
+
+		// get the normalized form of the word
+		word = WordDictionary.getInstance().getLemmaOrWord(word, posType);
+
+		return sentimentMap.get(word);
+	}
+
+	public boolean isSentimentWord(String word) {
+		return (getSentiment(word) != null);
+	}
+
+	public boolean isSentimentWord(String word, String posType) {
+		Sentiment sentiment = getSentiment(word, posType);
+		if (sentiment == null)
+			return false;
+
+		return sentiment.posType == posType;
+	}
+
+	public HashSet<String> getSentimentWordSet(String posType) {
+		if (posType == null)
+			return null;
+
+		return wordSetMap.get(posType);
+	}
+
+	public static String getSentimentName(int sentimentType) {
+		switch (sentimentType) {
+		case SENTIMENT_POSITIVE:
+			return "positive";
+		case SENTIMENT_NEGATIVE:
+			return "negative";
+		default:
+			return "unknown";
+		}
+	}
+
+	private SentimentVocab() {
+		// populate the sentiment map
+		addWordsToSentimentMap(POSITIVE_ADJECTTIVE_LIST,
+				POStags.TYPE_JJ, SENTIMENT_POSITIVE);
+		addWordsToSentimentMap(NEGATIVE_ADJECTTIVE_LIST,
+				POStags.TYPE_JJ, SENTIMENT_NEGATIVE);
+		addWordsToSentimentMap(POSITIVE_ADVERB_LIST, POStags.TYPE_RB,
+				SENTIMENT_POSITIVE);
+		addWordsToSentimentMap(NEGATIVE_ADVERB_LIST, POStags.TYPE_RB,
+				SENTIMENT_NEGATIVE);
+		addWordsToSentimentMap(POSITIVE_NOUN_LIST, POStags.TYPE_NN,
+				SENTIMENT_POSITIVE);
+		addWordsToSentimentMap(NEGATIVE_NOUN_LIST, POStags.TYPE_NN,
+				SENTIMENT_NEGATIVE);
+		addWordsToSentimentMap(POSITIVE_VERB_LIST, POStags.TYPE_VB,
+				SENTIMENT_POSITIVE);
+		addWordsToSentimentMap(NEGATIVE_VERB_LIST, POStags.TYPE_VB,
+				SENTIMENT_NEGATIVE);
+	}
+
+	private void addWordsToSentimentMap(String[] words, String posType,
+			int sentimentType) {
+
+		// add the word to the complete sentiment word map
+		for (String word : words) {
+			sentimentMap.put(word, new Sentiment(posType, sentimentType));
+		}
+
+		// add the word to the corresponding sentiment word set
+		HashSet<String> wordSet = wordSetMap.get(posType);
+		if (wordSet == null) {
+			wordSet = new HashSet<String>();
+			wordSetMap.put(posType, wordSet);
+		}
+		for (String word : words) {
+			wordSet.add(word);
+		}
+	}
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymListFilter.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,88 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+
+public class SynonymListFilter {
+	SynonymMap map=null;
+	
+	public SynonymListFilter(String dir){
+		dir = dir.replace("maps/analytics","");
+		try {
+			map = new SynonymMap( new FileInputStream(dir+"wn_s.pl"));
+		} catch (FileNotFoundException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+	}
+	
+	protected static Map<String, List<String>> filteredKeyword_synonyms = new HashMap<String, List<String>>();
+
+	static public List<String> getFileLines(File aFile) {
+
+		List<String> items = new ArrayList<String>();
+
+		StringBuilder contents = new StringBuilder();		    
+		try {
+
+			BufferedReader input =  new BufferedReader(new FileReader(aFile));
+			try {
+				String line = null; //not declared within while loop
+				while (( line = input.readLine()) != null){
+					int endOfWord = line.indexOf(';');
+					if (endOfWord>2)
+						line = line.substring(1, endOfWord -1 );
+
+					items.add(line);
+
+				}
+			}
+			finally {
+				input.close();
+			}
+		}
+		catch (IOException ex){
+			ex.printStackTrace();
+		}
+
+		return items;
+	}
+	public String getSynonym (String word){
+			String[] synonyms = map.getSynonyms(word);
+			if (synonyms==null || synonyms.length<1)
+				return null;
+			int index = (int) Math.floor(Math.random()*(double)synonyms.length);
+			System.out.println("Found synonyms "+Arrays.asList(synonyms).toString()+ " | selected synonym = "+synonyms[index] +" | for the input = "+ word);
+			return synonyms[index];
+			
+	}	
+	public static void main(String[] args){
+		SynonymListFilter filter = new  SynonymListFilter("/src/test/resources");
+		String syn = filter.getSynonym("bring");
+		syn = filter.getSynonym("yell");
+	}
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/SynonymMap.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,379 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.io.IOException;
+  import java.io.InputStream;
+   import java.nio.ByteBuffer;
+   import java.nio.charset.Charset;
+   import java.util.ArrayList;
+   import java.util.Arrays;
+   import java.util.HashMap;
+   import java.util.Iterator;
+   import java.util.Map;
+   import java.util.TreeMap;
+   import java.util.TreeSet;
+   
+   /**
+    * Loads the <a target="_blank" 
+    * href="http://www.cogsci.princeton.edu/~wn/">WordNet </a> prolog file <a
+    * href="http://www.cogsci.princeton.edu/2.0/WNprolog-2.0.tar.gz">wn_s.pl </a>
+    * into a thread-safe main-memory hash map that can be used for fast
+    * high-frequency lookups of synonyms for any given (lowercase) word string.
+    * <p>
+    * There holds: If B is a synonym for A (A -> B) then A is also a synonym for B (B -> A).
+    * There does not necessarily hold: A -> B, B -> C then A -> C.
+    * <p>
+    * Loading typically takes some 1.5 secs, so should be done only once per
+    * (server) program execution, using a singleton pattern. Once loaded, a
+    * synonym lookup via {@link #getSynonyms(String)}takes constant time O(1).
+    * A loaded default synonym map consumes about 10 MB main memory.
+    * An instance is immutable, hence thread-safe.
+    * <p>
+    * This implementation borrows some ideas from the Lucene Syns2Index demo that 
+    * Dave Spencer originally contributed to Lucene. Dave's approach
+    * involved a persistent Lucene index which is suitable for occasional
+    * lookups or very large synonym tables, but considered unsuitable for 
+    * high-frequency lookups of medium size synonym tables.
+    * <p>
+    * Example Usage:
+    * <pre>
+    * String[] words = new String[] { "hard", "woods", "forest", "wolfish", "xxxx"};
+    * SynonymMap map = new SynonymMap(new FileInputStream("samples/fulltext/wn_s.pl"));
+    * for (int i = 0; i &lt; words.length; i++) {
+    *     String[] synonyms = map.getSynonyms(words[i]);
+    *     System.out.println(words[i] + ":" + java.util.Arrays.asList(synonyms).toString());
+    * }
+    * 
+    * Example output:
+    * hard:[arduous, backbreaking, difficult, fermented, firmly, grueling, gruelling, heavily, heavy, intemperately, knockout, laborious, punishing, severe, severely, strong, toilsome, tough]
+    * woods:[forest, wood]
+   * forest:[afforest, timber, timberland, wood, woodland, woods]
+    * wolfish:[edacious, esurient, rapacious, ravening, ravenous, voracious, wolflike]
+    * xxxx:[]
+    * </pre>
+    *
+    * @see <a target="_blank"
+    *      href="http://www.cogsci.princeton.edu/~wn/man/prologdb.5WN.html">prologdb
+    *      man page </a>
+    * @see <a target="_blank" href="http://www.hostmon.com/rfc/advanced.jsp">Dave's synonym demo site</a>
+    */
+   public class SynonymMap {
+   
+     /** the index data; Map<String word, String[] synonyms> */
+     private final HashMap<String,String[]> table;
+     
+     private static final String[] EMPTY = new String[0];
+     
+     private static final boolean DEBUG = false;
+   
+     /**
+      * Constructs an instance, loading WordNet synonym data from the given input
+      * stream. Finally closes the stream. The words in the stream must be in
+      * UTF-8 or a compatible subset (for example ASCII, MacRoman, etc.).
+      * 
+      * @param input
+      *            the stream to read from (null indicates an empty synonym map)
+      * @throws IOException
+      *             if an error occured while reading the stream.
+      */
+     public SynonymMap(InputStream input) throws IOException {
+       this.table = input == null ? new HashMap<String,String[]>(0) : read(toByteArray(input));
+     }
+     
+     /**
+      * Returns the synonym set for the given word, sorted ascending.
+      * 
+      * @param word
+      *            the word to lookup (must be in lowercase).
+      * @return the synonyms; a set of zero or more words, sorted ascending, each
+      *         word containing lowercase characters that satisfy
+      *         <code>Character.isLetter()</code>.
+      */
+     public String[] getSynonyms(String word) {
+       String[] synonyms = table.get(word);
+       if (synonyms == null) return EMPTY;
+       String[] copy = new String[synonyms.length]; // copy for guaranteed immutability
+       System.arraycopy(synonyms, 0, copy, 0, synonyms.length);
+       return copy;
+     }
+     
+     /**
+      * Returns a String representation of the index data for debugging purposes.
+      * 
+      * @return a String representation
+      */
+     @Override
+     public String toString() {
+       StringBuilder buf = new StringBuilder();
+       Iterator<String> iter = new TreeMap<String,String[]>(table).keySet().iterator();
+       int count = 0;
+       int f0 = 0;
+       int f1 = 0;
+       int f2 = 0;
+       int f3 = 0;
+       
+       while (iter.hasNext()) {
+         String word = iter.next();
+         buf.append(word + ":");
+         String[] synonyms = getSynonyms(word);
+         buf.append(Arrays.asList(synonyms));
+         buf.append("\n");
+         count += synonyms.length;
+         if (synonyms.length == 0) f0++;
+         if (synonyms.length == 1) f1++;
+         if (synonyms.length == 2) f2++;
+         if (synonyms.length == 3) f3++;
+       }
+       
+       buf.append("\n\nkeys=" + table.size() + ", synonyms=" + count + ", f0=" + f0 +", f1=" + f1 + ", f2=" + f2 + ", f3=" + f3);
+       return buf.toString();
+     }
+     
+     /**
+      * Analyzes/transforms the given word on input stream loading. This default implementation simply
+      * lowercases the word. Override this method with a custom stemming
+      * algorithm or similar, if desired.
+      * 
+      * @param word
+      *            the word to analyze
+      * @return the same word, or a different word (or null to indicate that the
+      *         word should be ignored)
+      */
+     protected String analyze(String word) {
+       return word.toLowerCase();
+     }
+   
+     private static boolean isValid(String str) {
+       for (int i=str.length(); --i >= 0; ) {
+         if (!Character.isLetter(str.charAt(i))) return false;
+       }
+       return true;
+     }
+   
+     private HashMap<String,String[]> read(byte[] data) {
+       int WORDS  = (int) (76401 / 0.7); // presizing
+       int GROUPS = (int) (88022 / 0.7); // presizing
+       HashMap<String,ArrayList<Integer>> word2Groups = new HashMap<String,ArrayList<Integer>>(WORDS);  // Map<String word, int[] groups>
+       HashMap<Integer,ArrayList<String>> group2Words = new HashMap<Integer,ArrayList<String>>(GROUPS); // Map<int group, String[] words>
+       HashMap<String,String> internedWords = new HashMap<String,String>(WORDS);// Map<String word, String word>
+   
+       Charset charset = Charset.forName("UTF-8");
+       int lastNum = -1;
+       Integer lastGroup = null;
+       int len = data.length;
+       int i=0;
+       
+       while (i < len) { // until EOF
+         /* Part A: Parse a line */
+         
+         // scan to beginning of group
+         while (i < len && data[i] != '(') i++;
+         if (i >= len) break; // EOF
+         i++;
+         
+         // parse group
+         int num = 0;
+         while (i < len && data[i] != ',') {
+           num = 10*num + (data[i] - 48);
+           i++;
+         }
+         i++;
+   //      if (DEBUG) System.err.println("num="+ num);
+         
+         // scan to beginning of word
+         while (i < len && data[i] != '\'') i++;
+         i++;
+     
+         // scan to end of word
+         int start = i;
+         do {
+           while (i < len && data[i] != '\'') i++;
+           i++;
+         } while (i < len && data[i] != ','); // word must end with "',"
+         
+         if (i >= len) break; // EOF
+         String word = charset.decode(ByteBuffer.wrap(data, start, i-start-1)).toString();
+   //      String word = new String(data, 0, start, i-start-1); // ASCII
+         
+         /*
+          * Part B: ignore phrases (with spaces and hyphens) and
+          * non-alphabetic words, and let user customize word (e.g. do some
+          * stemming)
+          */
+         if (!isValid(word)) continue; // ignore
+         word = analyze(word);
+         if (word == null || word.length() == 0) continue; // ignore
+         
+         
+         /* Part C: Add (group,word) to tables */
+         
+         // ensure compact string representation, minimizing memory overhead
+         String w = internedWords.get(word);
+         if (w == null) {
+           word = new String(word); // ensure compact string
+           internedWords.put(word, word);
+         } else {
+           word = w;
+         }
+         
+         Integer group = lastGroup;
+         if (num != lastNum) {
+           group = Integer.valueOf(num);
+           lastGroup = group;
+           lastNum = num;
+         }
+         
+         // add word --> group
+         ArrayList<Integer> groups =  word2Groups.get(word);
+         if (groups == null) {
+           groups = new ArrayList<Integer>(1);
+           word2Groups.put(word, groups);
+         }
+         groups.add(group);
+   
+         // add group --> word
+         ArrayList<String> words = group2Words.get(group);
+         if (words == null) {
+           words = new ArrayList<String>(1);
+           group2Words.put(group, words);
+         } 
+         words.add(word);
+       }
+       
+       
+       /* Part D: compute index data structure */
+       HashMap<String,String[]> word2Syns = createIndex(word2Groups, group2Words);    
+           
+       /* Part E: minimize memory consumption by a factor 3 (or so) */
+   //    if (true) return word2Syns;
+       word2Groups = null; // help gc
+       //TODO: word2Groups.clear(); would be more appropriate  ? 
+       group2Words = null; // help gc
+       //TODO: group2Words.clear(); would be more appropriate  ? 
+       
+       return optimize(word2Syns, internedWords);
+     }
+     
+    private HashMap<String,String[]> createIndex(Map<String,ArrayList<Integer>> word2Groups, Map<Integer,ArrayList<String>> group2Words) {
+       HashMap<String,String[]> word2Syns = new HashMap<String,String[]>();
+       
+       for (final Map.Entry<String,ArrayList<Integer>> entry : word2Groups.entrySet()) { // for each word
+         ArrayList<Integer> group = entry.getValue();     
+         String word = entry.getKey();
+         
+   //      HashSet synonyms = new HashSet();
+         TreeSet<String> synonyms = new TreeSet<String>();
+         for (int i=group.size(); --i >= 0; ) { // for each groupID of word
+           ArrayList<String> words = group2Words.get(group.get(i));
+           for (int j=words.size(); --j >= 0; ) { // add all words       
+             String synonym = words.get(j); // note that w and word are interned
+             if (synonym != word) { // a word is implicitly it's own synonym
+               synonyms.add(synonym);
+             }
+           }
+         }
+   
+         int size = synonyms.size();
+         if (size > 0) {
+           String[] syns = new String[size];
+           if (size == 1)  
+             syns[0] = synonyms.first();
+           else
+             synonyms.toArray(syns);
+   //        if (syns.length > 1) Arrays.sort(syns);
+   //        if (DEBUG) System.err.println("word=" + word + ":" + Arrays.asList(syns));
+           word2Syns.put(word, syns);
+         }
+       }
+     
+       return word2Syns;
+     }
+   
+     private HashMap<String,String[]> optimize(HashMap<String,String[]> word2Syns, HashMap<String,String> internedWords) {
+       if (DEBUG) {
+         System.err.println("before gc");
+         for (int i=0; i < 10; i++) System.gc();
+         System.err.println("after gc");
+       }
+       
+       // collect entries
+       int len = 0;
+       int size = word2Syns.size();
+       String[][] allSynonyms = new String[size][];
+       String[] words = new String[size];
+       Iterator<Map.Entry<String,String[]>> iter = word2Syns.entrySet().iterator();
+       for (int j=0; j < size; j++) {
+         Map.Entry<String,String[]> entry = iter.next();
+         allSynonyms[j] = entry.getValue(); 
+         words[j] = entry.getKey();
+         len += words[j].length();
+       }
+       
+       // assemble large string containing all words
+       StringBuilder buf = new StringBuilder(len);
+       for (int j=0; j < size; j++) buf.append(words[j]);
+       String allWords = new String(buf.toString()); // ensure compact string across JDK versions
+       buf = null;
+       
+       // intern words at app level via memory-overlaid substrings
+       for (int p=0, j=0; j < size; j++) {
+         String word = words[j];
+         internedWords.put(word, allWords.substring(p, p + word.length()));
+         p += word.length();
+       }
+       
+       // replace words with interned words
+       for (int j=0; j < size; j++) {
+         String[] syns = allSynonyms[j];
+         for (int k=syns.length; --k >= 0; ) {
+           syns[k] = internedWords.get(syns[k]);
+         }
+         word2Syns.remove(words[j]);
+         word2Syns.put(internedWords.get(words[j]), syns);
+      }
+       
+       if (DEBUG) {
+         words = null;
+         allSynonyms = null;
+         internedWords = null;
+         allWords = null;
+         System.err.println("before gc");
+         for (int i=0; i < 10; i++) System.gc();
+         System.err.println("after gc");
+       }
+       return word2Syns;
+     }
+     
+     // the following utility methods below are copied from Apache style Nux library - see http://dsd.lbl.gov/nux
+     private static byte[] toByteArray(InputStream input) throws IOException {
+       try {
+         // safe and fast even if input.available() behaves weird or buggy
+         int len = Math.max(256, input.available());
+         byte[] buffer = new byte[len];
+         byte[] output = new byte[len];
+         
+         len = 0;
+         int n;
+         while ((n = input.read(buffer)) >= 0) {
+           if (len + n > output.length) { // grow capacity
+             byte tmp[] = new byte[Math.max(output.length << 1, len + n)];
+             System.arraycopy(output, 0, tmp, 0, len);
+             System.arraycopy(buffer, 0, tmp, len, n);
+             buffer = output; // use larger buffer for future larger bulk reads
+             output = tmp;
+           } else {
+             System.arraycopy(buffer, 0, output, len, n);
+           }
+           len += n;
+         }
+   
+         if (len == output.length) return output;
+         buffer = null; // help gc
+         buffer = new byte[len];
+         System.arraycopy(output, 0, buffer, 0, len);
+         return buffer;
+       } finally {
+         if (input != null) input.close();
+       }
+     }
+     
+}
\ No newline at end of file

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/TopJWNLDictionary.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.tools.coref.mention.Dictionary;
+
+import net.didion.jwnl.JWNL;
+import net.didion.jwnl.JWNLException;
+import net.didion.jwnl.data.Adjective;
+import net.didion.jwnl.data.IndexWord;
+import net.didion.jwnl.data.POS;
+import net.didion.jwnl.data.Pointer;
+import net.didion.jwnl.data.PointerType;
+import net.didion.jwnl.data.Synset;
+import net.didion.jwnl.data.VerbFrame;
+import net.didion.jwnl.dictionary.MapBackedDictionary;
+import net.didion.jwnl.dictionary.MorphologicalProcessor;
+import net.didion.jwnl.dictionary.morph.DefaultMorphologicalProcessor;
+import net.didion.jwnl.dictionary.morph.DetachSuffixesOperation;
+import net.didion.jwnl.dictionary.morph.LookupExceptionsOperation;
+import net.didion.jwnl.dictionary.morph.LookupIndexWordOperation;
+import net.didion.jwnl.dictionary.morph.Operation;
+import net.didion.jwnl.dictionary.morph.TokenizerOperation;
+import net.didion.jwnl.princeton.file.PrincetonObjectDictionaryFile;
+
+/**
+ * An implementation of the Dictionary interface using the JWNL library.
+ */
+public class TopJWNLDictionary implements Dictionary {
+
+	private net.didion.jwnl.dictionary.Dictionary dict;
+	private MorphologicalProcessor morphy;
+	private static String[] empty = new String[0];
+
+	public TopJWNLDictionary(String propertiesFile) throws IOException,
+			JWNLException {
+		JWNL.initialize(this.getClass().getResourceAsStream(propertiesFile));
+		dict = net.didion.jwnl.dictionary.Dictionary.getInstance();
+		morphy = dict.getMorphologicalProcessor();
+	}
+
+	@SuppressWarnings("unchecked")
+	public String[] getLemmas(String word, String tag) {
+		try {
+			POS pos;
+			if (tag.startsWith("N") || tag.startsWith("n")) {
+				pos = POS.NOUN;
+			} else if (tag.startsWith("N") || tag.startsWith("v")) {
+				pos = POS.VERB;
+			} else if (tag.startsWith("J") || tag.startsWith("a")) {
+				pos = POS.ADJECTIVE;
+			} else if (tag.startsWith("R") || tag.startsWith("r")) {
+				pos = POS.ADVERB;
+			} else {
+				pos = POS.NOUN;
+			}
+			List<String> lemmas = morphy.lookupAllBaseForms(pos, word);
+			return lemmas.toArray(new String[lemmas.size()]);
+		} catch (JWNLException e) {
+			e.printStackTrace();
+			return null;
+		}
+	}
+
+	public String getSenseKey(String lemma, String pos, int sense) {
+		try {
+			IndexWord iw = dict.getIndexWord(POS.NOUN, lemma);
+			if (iw == null) {
+				return null;
+			}
+			return String.valueOf(iw.getSynsetOffsets()[sense]);
+		} catch (JWNLException e) {
+			e.printStackTrace();
+			return null;
+		}
+
+	}
+
+	public int getNumSenses(String lemma, String pos) {
+		try {
+			IndexWord iw = dict.getIndexWord(POS.NOUN, lemma);
+			if (iw == null) {
+				return 0;
+			}
+			return iw.getSenseCount();
+		} catch (JWNLException e) {
+			return 0;
+		}
+	}
+
+	private void getParents(Synset synset, List<String> parents)
+			throws JWNLException {
+		Pointer[] pointers = synset.getPointers();
+		for (int pi = 0, pn = pointers.length; pi < pn; pi++) {
+			if (pointers[pi].getType() == PointerType.HYPERNYM) {
+				Synset parent = pointers[pi].getTargetSynset();
+				parents.add(String.valueOf(parent.getOffset()));
+				getParents(parent, parents);
+			}
+		}
+	}
+
+	public String[] getParentSenseKeys(String lemma, String pos, int sense) {
+		// System.err.println("JWNLDictionary.getParentSenseKeys: lemma="+lemma);
+		try {
+			IndexWord iw = dict.getIndexWord(POS.NOUN, lemma);
+			if (iw != null) {
+				Synset synset = iw.getSense(sense + 1);
+				List<String> parents = new ArrayList<String>();
+				getParents(synset, parents);
+				return parents.toArray(new String[parents.size()]);
+			} else {
+				return empty;
+			}
+		} catch (JWNLException e) {
+			e.printStackTrace();
+			return null;
+		}
+	}
+
+	public static void main(String[] args) throws IOException, JWNLException {
+		String searchDir = System.getProperty("WNSEARCHDIR");
+		System.err.println("searchDir=" + searchDir);
+		searchDir = "models/WordNet_2.1";
+		if (searchDir != null) {
+			Dictionary dict = new TopJWNLDictionary(
+					System.getProperty("WNSEARCHDIR"));
+			// Dictionary dict = new TopJWNLDictionary();
+			// String word = args[0];
+			String[] lemmas = dict.getLemmas("test", "NN");
+			for (int li = 0, ln = lemmas.length; li < ln; li++) {
+				for (int si = 0, sn = dict.getNumSenses(lemmas[li], "NN"); si < sn; si++) {
+					System.out.println(lemmas[li]
+							+ " ("
+							+ si
+							+ ")\t"
+							+ java.util.Arrays.asList(dict.getParentSenseKeys(
+									lemmas[li], "NN", si)));
+				}
+			}
+		}
+	}
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/relevanceVocabs/WordDictionary.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,137 @@
+package opennlp.tools.apps.relevanceVocabs;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import opennlp.tools.coref.mention.Dictionary;
+
+public class WordDictionary {
+	private static final String[][] SPECIAL_CASES = { { "lens", "lenses" } };
+
+	//private static final String WORDNET_PROPERTITES_KEY = "wordnet.propertites.file";
+	//private static final String PROPERTIES_FILE = null;;
+
+	// private static final String DATA_DIR;
+	private static WordDictionary instance;
+
+	private Dictionary dictionary;
+	private Map<String, String> specialCaseMap;
+
+	/*static {
+		ConfigProperties config = ConfigFactory.getInstance()
+				.getConfigProperties(ConfigFactory.NLP_CONFIG_PATH);
+		PROPERTIES_FILE = config.getProperty(WORDNET_PROPERTITES_KEY);
+	}*/
+
+	public synchronized static WordDictionary getInstance() {
+		if (instance == null)
+			instance = new WordDictionary();
+
+		return instance;
+	}
+
+	private WordDictionary() {
+		// initialize the dictionary by loading the WordNet database
+		try {
+			dictionary = new TopJWNLDictionary("PROPERTIES_FILE");
+		} catch (Exception e) {
+			e.printStackTrace();
+			System.err.println("Failed to load the WordNet database: " + e);
+		}
+
+		// build the dictionary for special cases
+		specialCaseMap = buildSpecialCaseMap();
+	}
+
+	public String getLemmaOrWord(String word, String type) {
+		String lemma = getLemma(word, type);
+		if (lemma != null)
+			return lemma;
+		else
+			return (word == null) ? null : word.trim().toLowerCase();
+	}
+
+	public String getLemma(String word, String type) {
+		if (word == null)
+			return null;
+		// skip some long word,avoid dictionary getLemmas dead
+		if (word.length() >= 20)
+			return word;
+		word = word.trim().toLowerCase();
+		if (word.length() == 0)
+			return null;
+
+		// check special cases first
+		String lemma = specialCaseMap.get(word);
+		if (lemma != null)
+			return lemma;
+
+		// use the dictionary for general cases
+		// JWNLDictionary has a bug, and we have to use lower case type
+		type = (type == null) ? null : type.toLowerCase();
+		String[] lemmas = dictionary.getLemmas(word, type);
+		if (lemmas == null || lemmas.length == 0)
+			return null;
+
+		return lemmas[0];
+	}
+
+	/**
+	 * get the lemma for a word of unknown POS type return the word if no lemma
+	 * is found
+	 * 
+	 * @param word
+	 * @return
+	 */
+	public String getLemmaOrWord(String word) {
+		if (word == null)
+			return null;
+
+		// try noun first
+		String lemma = getLemma(word, "NN");
+		if (lemma != null)
+			return lemma;
+
+		// then try verb
+		lemma = getLemma(word, "VB");
+		if (lemma != null)
+			return lemma;
+
+		// return word now
+		return word.trim().toLowerCase();
+	}
+
+	private Map<String, String> buildSpecialCaseMap() {
+
+		Map<String, String> specialCaseMap = new HashMap<String, String>();
+		for (String[] wordList : SPECIAL_CASES) {
+			String lemma = wordList[0];
+			for (String word : wordList) {
+				specialCaseMap.put(word, lemma);
+			}
+		}
+
+		return specialCaseMap;
+	}
+
+	public static void main(String[] args) {
+		String[] verbs = { "is", "has", "were", "likes", "TaKen", "going" };
+		String[] nouns = { "efficient", "Cars", "lens", "wives", "lenses",
+				"photos" };
+		String[] adverbs = { "would", "could", "should", "might" };
+		WordDictionary dictionary = WordDictionary.getInstance();
+
+		for (String word : verbs) {
+			System.out
+					.println(word + " ==> " + dictionary.getLemma(word, "VB"));
+		}
+		for (String word : nouns) {
+			System.out
+					.println(word + " ==> " + dictionary.getLemma(word, "NN"));
+		}
+		for (String word : adverbs) {
+			System.out
+					.println(word + " ==> " + dictionary.getLemma(word, "JJ"));
+		}
+	}
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/BingAPIProductSearchManager.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,68 @@
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.similarity.apps.BingQueryRunner;
+import opennlp.tools.similarity.apps.HitBase;
+
+import org.apache.commons.lang.StringUtils;
+
+public class BingAPIProductSearchManager {
+	BingQueryRunner search = new BingQueryRunner();
+
+	public List<HitBase> findProductByName(String name, int count){
+		List<HitBase> foundFBPages = search.runSearch("site:amazon.com"+" "+name + " reviews"
+				, 10);
+		List<HitBase> results = new ArrayList<HitBase>();
+		int ct=0;
+		for(HitBase h: foundFBPages){
+			if (ct>=count) break; ct++; 
+			String title = h.getTitle().toLowerCase();
+			if (h.getUrl().indexOf("amazon.com")<0)
+				continue;
+			String[] merchantWords = name.toLowerCase().split(" ");
+			int overlapCount=0;
+/*			for(String commonWord:merchantWords){
+				if (title.indexOf(commonWord+" ")>-1 || title.indexOf(" "+commonWord)>-1){
+					overlapCount++;
+					System.out.println(" found word "+ commonWord + " in title = "+title);
+				}
+			}
+			float coverage = (float)overlapCount/(float) (merchantWords.length);
+			if ((coverage>0.4 || (coverage>0.5f && merchantWords.length <4 )))
+*/				results.add(h);
+		}
+		return results;
+	}
+	
+	public List<HitBase> findProductByNameNoReview(String name, int count){
+		List<HitBase> foundFBPages = search.runSearch(name, count);
+		List<HitBase> results = new ArrayList<HitBase>();
+		int ct=0;
+		for(HitBase h: foundFBPages){
+			if (ct>=count) break; ct++; 
+			String title = h.getTitle().toLowerCase();
+			String[] merchantWords = name.toLowerCase().split(" ");
+			int overlapCount=0;
+			for(String commonWord:merchantWords){
+				if (title.indexOf(commonWord+" ")>-1 || title.indexOf(" "+commonWord)>-1){
+					overlapCount++;
+					System.out.println(" found word "+ commonWord + " in title = "+title);
+				}
+			}
+			float coverage = (float)overlapCount/(float) (merchantWords.length);
+			if ((coverage>0.4 || (coverage>0.5f && merchantWords.length <4 )))
+				results.add(h);
+		}
+		return results;
+	}
+
+	
+
+	public static void main(String[] args){
+		BingAPIProductSearchManager man = new BingAPIProductSearchManager ();
+		List<HitBase> res = man.findProductByName("chain saw", 5);
+		System.out.println(res);  	
+	}
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/FBOpenGraphSearchManager.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,143 @@
+package opennlp.tools.apps.review_builder;
+
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.List;
+import org.apache.commons.lang.StringUtils;
+
+import opennlp.tools.jsmlearning.ProfileReaderWriter;
+import opennlp.tools.similarity.apps.utils.PageFetcher;
+import com.restfb.Connection;
+import com.restfb.DefaultFacebookClient;
+import com.restfb.FacebookClient;
+import com.restfb.Parameter;
+import com.restfb.exception.FacebookException;
+import com.restfb.types.Event;
+import com.restfb.types.Page;
+
+
+public class FBOpenGraphSearchManager {
+
+	public List<String[]> profiles = null;
+	protected FacebookClient mFBClient;
+	protected PageFetcher pageFetcher = new PageFetcher();
+	protected static final int NUM_TRIES = 5;
+	protected static final long WAIT_BTW_TRIES=1000; //milliseconds between re-tries
+	
+		
+	public FBOpenGraphSearchManager(){
+		profiles = ProfileReaderWriter.readProfiles("C:\\nc\\features\\analytics\\dealanalyzer\\sweetjack-localcoupon-may12012tooct302012.csv");
+		
+	}
+	
+		
+	public void setFacebookClient(FacebookClient c){
+		this.mFBClient=c;
+	}
+	
+	public List<Event> getFBEventsByName(String event)
+	{
+	    List<Event> events = new ArrayList<Event>();
+	    
+	    for(int i=0; i < NUM_TRIES; i++)
+	    {
+    	    try
+    	    {
+        	    Connection<Event> publicSearch =
+        	            mFBClient.fetchConnection("search", Event.class,
+        	              Parameter.with("q", event), Parameter.with("type", "event"),Parameter.with("limit", 100));
+        	    System.out.println("Searching FB events for " + event);
+        	    events= publicSearch.getData();
+        	    break;
+    	    }
+    	    catch(FacebookException e)
+    	    {
+    	    	System.out.println("FBError "+e);
+    	        try
+                {
+                    Thread.sleep(WAIT_BTW_TRIES);
+                }
+                catch (InterruptedException e1)
+                {
+                    // TODO Auto-generated catch block
+                	System.out.println("Error "+e1);
+                }
+    	    }
+	    }
+	    return events;
+	}
+	
+	public Long getFBPageLikes(String merchant)
+	{
+        List<Page> groups = new ArrayList<Page>();
+        
+        for(int i=0; i < NUM_TRIES; i++)
+        {
+            try
+            {
+                Connection<Page> publicSearch =
+                        mFBClient.fetchConnection("search", Page.class,
+                          Parameter.with("q", merchant), Parameter.with("type", "page"),Parameter.with("limit", 100));
+                System.out.println("Searching FB Pages for " + merchant);
+                groups= publicSearch.getData();
+                break;
+            }
+            catch(FacebookException e)
+            {
+            	System.out.println("FBError "+e);
+                try
+                {
+                    Thread.sleep(WAIT_BTW_TRIES);
+                }
+                catch (InterruptedException e1)
+                {
+                    // TODO Auto-generated catch block
+                	System.out.println("Error "+e1);
+                }
+            }
+        }
+        
+        for (Page p: groups){
+        	if (p!=null && p.getLikes()!=null && p.getLikes()>0) 
+        		return p.getLikes();
+        }
+        
+        //stats fwb">235</span>
+        
+        for (Page p: groups){
+        	if (p.getId()==null)
+        		continue;
+        	String content = pageFetcher.fetchOrigHTML("http://www.facebook.com/"+p.getId());
+        
+        	String likes = StringUtils.substringBetween(content, "stats fwb\">", "<" );
+        	if (likes==null)
+        		continue;
+        	Integer nLikes =0;
+        	try {
+        	nLikes = Integer.parseInt(likes);
+        	} catch (Exception e){
+        		
+        	}
+        	if (nLikes>0){
+        		return (long)nLikes;
+        	}
+        	
+        }
+        
+        
+        return null;
+	}
+	
+
+    // 
+    
+    public static void main(String[] args){
+    	FBOpenGraphSearchManager man = new FBOpenGraphSearchManager ();
+    	man.setFacebookClient(new DefaultFacebookClient());
+       	
+    	
+    	long res = man.getFBPageLikes("chain saw");
+    	System.out.println(res);
+    	    	
+    }
+}

Added: opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java?rev=1555944&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java (added)
+++ opennlp/sandbox/opennlp-similarity/src/main/java/opennlp/tools/apps/review_builder/MachineTranslationWrapper.java Mon Jan  6 17:48:30 2014
@@ -0,0 +1,86 @@
+package opennlp.tools.apps.review_builder;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.URLDecoder;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.lang.StringUtils;
+import org.json.JSONArray;
+import org.json.JSONException;
+import org.json.JSONObject;
+
+public class MachineTranslationWrapper  {
+	private String translatorURL = "http://mymemory.translated.net/api/get?q=";
+	
+	public String translate(String sentence, String lang2lang){
+		if (sentence==null)
+			return null;
+		String request = translatorURL + sentence.replace(' ','+') + "&langpair="+lang2lang;//"en|es";
+		JSONArray arr=null, prodArr = null, searchURLviewArr = null;
+		try {
+			URL urlC = new URL(request);
+			URLConnection connection = urlC.openConnection();
+
+			String line;
+			String result = "";
+			BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
+			int count = 0;
+			while ((line = reader.readLine()) != null)
+			{
+				result+=line;
+				count++;
+			}
+			JSONObject rootObject = new JSONObject(result);
+			JSONObject  findObject = rootObject.getJSONObject("responseData");
+			String transl = findObject.getString("translatedText");
+			try {
+				transl = URLDecoder.decode(transl, "UTF-8");
+			} catch (Exception e) {
+				
+			}
+			
+			return transl;
+			
+		} catch (MalformedURLException e) {
+			
+			e.printStackTrace();
+			return null;
+		} catch (JSONException e) {
+			e.printStackTrace();
+			return null;			
+		} catch (IOException e) {
+			e.printStackTrace();
+			return null;			
+		}	
+		
+	}
+	
+	public String rePhrase(String sentence){
+		System.out.println("orig = "+ sentence);
+		String transl = translate(sentence, "en|es");
+		System.out.println("tranls = "+transl);
+		String inverseTransl = translate(transl, "es|en");
+		if (!(inverseTransl.indexOf("NO QUERY SPECIFIED")>-1) && !(inverseTransl.indexOf("INVALID LANGUAGE")>-1) && !(inverseTransl.indexOf("MYMEMORY WARNING")>-1))
+			return inverseTransl;
+		else 
+			return sentence;
+	}
+	
+	
+	
+	public static void main(String[] args){
+		MachineTranslationWrapper rePhraser = new MachineTranslationWrapper();
+		
+		System.out.println(rePhraser.translate("I went to the nearest bookstore to buy a book written by my friend and his aunt", "en|ru"));
+		
+		System.out.println(rePhraser.rePhrase("I went to the nearest bookstore to buy a book written by my friend and his aunt"));
+
+	}
+		
+}