You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2015/06/24 22:19:05 UTC

svn commit: r1687358 - in /opennlp/sandbox/opennlp-wsd: ./ src/ src/main/ src/main/java/ src/main/java/opennlp/ src/main/java/opennlp/tools/ src/main/java/opennlp/tools/disambiguator/ src/main/java/opennlp/tools/disambiguator/ims/ src/main/java/opennlp...

Author: joern
Date: Wed Jun 24 20:19:05 2015
New Revision: 1687358

URL: http://svn.apache.org/r1687358
Log:
Added initial version of the wsd component. Thanks to Anthony Beylerian and Mondher Bouazizi for the contribution.

Added:
    opennlp/sandbox/opennlp-wsd/
    opennlp/sandbox/opennlp-wsd/src/
    opennlp/sandbox/opennlp-wsd/src/main/
    opennlp/sandbox/opennlp-wsd/src/main/java/
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
    opennlp/sandbox/opennlp-wsd/src/test/
    opennlp/sandbox/opennlp-wsd/src/test/java/
    opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/
    opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/
    opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/
    opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,134 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+import net.sf.extjwnl.data.POS;
+
+
+public class Constants {
+	
+	public static String osPathChar = "\\";
+
+	// List of all the PoS tags
+	public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ",
+			"JJR", "JJS", "LS", "MD", "NN", "NNS", "NNP", "NNPS", "PDT", "POS",
+			"PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH", "VB",
+			"VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB" };
+
+	// List of the PoS tags of which the senses are to be extracted
+	public static String[] relevantPOS = { "JJ", "JJR", "JJS", "NN", "NNS", "RB", "RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" };
+
+	
+	// List of Negation Words
+	public static ArrayList<String> negationWords = new ArrayList<String>(
+			Arrays.asList("not", "no", "never", "none", "nor", "non"));
+		
+	// List of Stop Words
+	public static ArrayList<String> stopWords = new ArrayList<String>(Arrays.asList( "a", "able", "about", "above", "according", "accordingly", "across", "actually", "after",
+		"afterwards", "again", "against", "ain't", "all", "allow", "allows", "almost", "alone", "along", "already", "also",
+		"although", "always", "am", "among", "amongst", "an", "and", "another", "any", "anybody", "anyhow", "anyone", "anything",
+		"anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "are", "aren't", "around", "as", "aside", "ask",
+		"asking", "associated", "at", "available", "away", "awfully", "be", "became", "because", "become", "becomes", "becoming", "been",
+		"before", "beforehand", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both",
+		"brief", "but", "by", "came", "can", "cannot", "cant", "can't", "cause", "causes", "certain", "certainly", "changes", "clearly",
+		"c'mon", "co", "com", "come", "comes", "concerning", "consequently", "consider", "considering", "contain", "containing",
+		"contains", "corresponding", "could", "couldn't", "course", "c's", "currently", "definitely", "described", "despite", "did", "didn't",
+		"different", "do", "does", "doesn't", "doing", "done", "don't", "down", "downwards", "during", "each", "edu", "eg", "eight",
+		"either", "else", "elsewhere", "enough", "entirely", "especially", "et", "etc", "even", "ever", "every", "everybody", "everyone",
+		"everything", "everywhere", "ex", "exactly", "example", "except", "far", "few", "fifth", "first", "five", "followed", "following",
+		"follows", "for", "former", "formerly", "forth", "four", "from", "further", "furthermore", "get", "gets", "getting", "given",
+		"gives", "go", "goes", "going", "gone", "got", "gotten", "greetings", "had", "hadn't", "happens", "hardly", "has", "hasn't",
+		"have", "haven't", "having", "he", "hello", "help", "hence", "her", "here", "hereafter", "hereby", "herein", "here's", "hereupon",
+		"hers", "herself", "he's", "hi", "him", "himself", "his", "hither", "hopefully", "how", "howbeit", "however", "i", "i'd", "ie", "if",
+		"ignored", "i'll", "i'm", "immediate", "in", "inasmuch", "inc", "indeed", "indicate", "indicated", "indicates", "inner", "insofar",
+		"instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll", "its", "it's", "itself", "i've", "just", "keep", "keeps", "kept",
+		"know", "known", "knows", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "let's", "like",
+		"liked", "likely", "little", "look", "looking", "looks", "ltd", "mainly", "many", "may", "maybe", "me", "mean", "meanwhile",
+		"merely", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "name", "namely", "nd", "near", "nearly",
+		"necessary", "need", "needs", "neither", "never", "nevertheless", "new", "next", "nine", "no", "nobody", "non", "none", "noone",
+		"nor", "normally", "not", "nothing", "novel", "now", "nowhere", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "on",
+		"once", "one", "ones", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside",
+		"over", "overall", "own", "particular", "particularly", "per", "perhaps", "placed", "please", "plus", "possible", "presumably",
+		"probably", "provides", "que", "quite", "qv", "rather", "rd", "re", "really", "reasonably", "regarding", "regardless", "regards",
+		"relatively", "respectively", "right", "said", "same", "saw", "say", "saying", "says", "second", "secondly", "see", "seeing",
+		"seem", "seemed", "seeming", "seems", "seen", "self", "selves",	"sensible", "sent", "serious", "seriously", "seven", "several",
+		"shall", "she", "should", "shouldn't", "since", "six", "so", "some", "somebody", "somehow", "someone", "something", "sometime",
+		"sometimes", "somewhat", "somewhere", "soon", "sorry", "specified",	"specify", "specifying", "still", "sub", "such", "sup", "sure",
+		"take", "taken", "tell", "tends", "th", "than", "thank", "thanks", "thanx", "that", "thats", "that's", "the", "their", "theirs",
+		"them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "theres", "there's",
+		"thereupon", "these", "they", "they'd", "they'll", "they're", "they've", "think", "third", "this", "thorough", "thoroughly",
+		"those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "took", "toward", "towards",
+		"tried", "tries", "truly", "try", "trying", "t's", "twice", "two", "un", "under", "unfortunately", "unless", "unlikely", "until",
+		"unto", "up", "upon", "us", "use", "used", "useful", "uses", "using", "usually", "value", "various", "very", "via", "viz", "vs",
+		"want", "wants", "was", "wasn't", "way", "we", "we'd", "welcome", "well", "we'll", "went", "were", "we're", "weren't", "we've",
+		"what", "whatever", "what's", "when", "whence", "whenever",	"where", "whereafter", "whereas", "whereby", "wherein", "where's",
+		"whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "who's", "whose", "why", "will",
+		"willing", "wish", "with", "within", "without", "wonder", "won't", "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll",
+		"your", "you're", "yours", "yourself", "yourselves", "you've", "zero"));
+		
+	// Print a text in the console
+	public static void print(Object in) {
+		System.out.println(in);
+	}
+
+	public static void print(Object[] array) {
+		System.out.println(Arrays.asList(array));
+	}
+
+	public static void print(Object[][] array) {
+		System.out.print("[");
+		for (int i = 0; i < array.length; i++) {
+			print(array[i]);
+			if (i != array.length - 1) {
+				System.out.print("\n");
+			}
+			print("]");
+		}
+	}
+
+	// return the PoS (Class POS) out of the PoS-tag
+	public static POS getPOS(String posTag) {
+
+		ArrayList<String> adjective = new ArrayList<String>(Arrays.asList("JJ", "JJR", "JJS"));
+		ArrayList<String> adverb = new ArrayList<String>(Arrays.asList("RB", "RBR", "RBS"));
+		ArrayList<String> noun = new ArrayList<String>(Arrays.asList("NN", "NNS", "NNP", "NNPS"));
+		ArrayList<String> verb = new ArrayList<String>(Arrays.asList("VB", "VBD", "VBG", "VBN", "VBP", "VBZ"));
+
+		if (adjective.contains(posTag)) return POS.ADJECTIVE;
+		else if (adverb.contains(posTag)) return POS.ADVERB;
+		else if (noun.contains(posTag)) return POS.NOUN;
+		else if (verb.contains(posTag)) return POS.VERB;
+		else return null;
+
+	}
+	
+	// Check whether a list of arrays contains an array
+	public static boolean belongsTo(String[] array, ArrayList<String[]> fullList) {
+		for (String[] refArray : fullList) {
+			if (areStringArraysEqual(array, refArray))
+				return true;
+		}
+		return false;
+	}
+
+	// Check whether two arrays of strings are equal
+	public static boolean areStringArraysEqual(String[] array1, String[] array2) {
+
+		if (array1.equals(null) || array2.equals(null))
+			return false;
+
+		if (array1.length != array2.length) {
+			return false;
+		}
+		for (int i = 0; i < array1.length; i++) {
+			if (!array1[i].equals(array2[i])) {
+				return false;
+			}
+		}
+
+		return true;
+
+	}
+	
+}

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,227 @@
+package opennlp.tools.disambiguator;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.HashMap;
+
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.POS;
+import net.sf.extjwnl.dictionary.Dictionary;
+import net.sf.extjwnl.dictionary.MorphologicalProcessor;
+import opennlp.tools.cmdline.postag.POSModelLoader;
+import opennlp.tools.lemmatizer.SimpleLemmatizer;
+import opennlp.tools.ml.model.MaxentModel;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.InvalidFormatException;
+
+public class Loader {
+
+	private static String modelsDir = "src\\test\\resources\\opennlp\\tools\\disambiguator\\";
+	
+	private static SentenceDetectorME sdetector;
+	private static Tokenizer tokenizer;
+	private static POSTaggerME tagger;
+	private static NameFinderME nameFinder;
+	private static SimpleLemmatizer lemmatizer;
+
+	private static Dictionary dictionary;
+	private static MorphologicalProcessor morph;
+	private static boolean IsInitialized = false;  
+
+	// local caches for faster lookup
+	private static HashMap<String,Object> stemCache;
+	private static HashMap<String,Object> stopCache;
+	private static HashMap<String,Object> relvCache;
+	
+	
+
+	// Constructor
+	public Loader(){
+		super();
+		load();
+	}
+
+	public static HashMap<String,Object> getRelvCache(){
+		if (relvCache==null || relvCache.keySet().isEmpty()){
+			relvCache = new HashMap<String, Object>();
+			for (String t : Constants.relevantPOS){
+				relvCache.put(t, null);
+			}
+		}
+		return relvCache;
+	}
+	
+	public static HashMap<String,Object> getStopCache(){
+		if (stopCache==null || stopCache.keySet().isEmpty()){
+			stopCache = new HashMap<String, Object>();
+			for (String s : Constants.stopWords){
+				stopCache.put(s, null);
+			}
+		}
+		return stopCache;
+	}
+	
+	public static HashMap<String,Object> getStemCache(){
+		if (stemCache==null || stemCache.keySet().isEmpty()){
+			stemCache = new HashMap<String,Object>();
+			for (Object pos : POS.getAllPOS()){
+				stemCache.put(((POS)pos).getKey(),new HashMap());
+			}
+		}
+		return stemCache;
+	}
+	
+	public static MorphologicalProcessor getMorph(){
+		if (morph==null){
+			morph 		= dictionary.getMorphologicalProcessor();
+		}
+		return morph;
+	}
+
+	public static Dictionary getDictionary(){
+		if (dictionary==null){
+			try {
+				dictionary 	= Dictionary.getDefaultResourceInstance();
+			} catch (JWNLException e) {
+				e.printStackTrace();
+			}
+		}
+		return dictionary;
+	}
+
+	public static SimpleLemmatizer getLemmatizer(){
+		if (lemmatizer==null){
+			try {
+				lemmatizer = new SimpleLemmatizer (new FileInputStream(modelsDir + "en-lemmatizer.dict"));
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		
+		return lemmatizer;
+	}
+
+	public static NameFinderME getNameFinder(){
+		if (nameFinder==null){
+			TokenNameFinderModel nameFinderModel;
+			try {
+				nameFinderModel = new TokenNameFinderModel(new FileInputStream(modelsDir + "en-ner-person.bin"));
+				nameFinder = new NameFinderME(nameFinderModel);
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		return nameFinder;
+	}
+
+	public static POSTaggerME getTagger(){
+		if (tagger==null){
+			POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir + "en-pos-maxent.bin"));
+			tagger = new POSTaggerME(posTaggerModel);
+		}
+		return tagger;
+	}
+
+	public static SentenceDetectorME getSDetector(){
+		if (sdetector==null){
+			try {
+				SentenceModel enSentModel = new SentenceModel(new FileInputStream(modelsDir + "en-sent.bin"));
+				sdetector = new SentenceDetectorME(enSentModel);
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+		}
+		return sdetector;
+	}
+
+	public static Tokenizer getTokenizer(){
+		if (tokenizer == null){
+			try {
+				TokenizerModel  tokenizerModel = new TokenizerModel(new FileInputStream(modelsDir + "en-token.bin"));
+				tokenizer = new TokenizerME(tokenizerModel);
+			} catch (IOException e) {
+				e.printStackTrace();
+			}
+
+		}
+		return tokenizer;
+	}
+
+	public static boolean isInitialized(){
+		return (dictionary !=null
+				&& morph		!=null 
+				&& stemCache	!=null 
+				&& stopCache	!=null
+				&& relvCache 	!=null);
+	}
+	
+	public void load(){
+		try {
+			SentenceModel enSentModel = new SentenceModel(new FileInputStream(modelsDir + "en-sent.bin"));
+			sdetector = new SentenceDetectorME(enSentModel);
+
+			TokenizerModel TokenizerModel = new TokenizerModel(new FileInputStream(modelsDir + "en-token.bin"));
+			tokenizer = new TokenizerME(TokenizerModel);
+
+
+			POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir + "en-pos-maxent.bin"));
+			tagger = new POSTaggerME(posTaggerModel);
+
+			TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(new FileInputStream(modelsDir + "en-ner-person.bin"));
+			nameFinder = new NameFinderME(nameFinderModel);
+
+			lemmatizer = new SimpleLemmatizer (new FileInputStream(modelsDir + "en-lemmatizer.dict"));
+
+			dictionary 	= Dictionary.getDefaultResourceInstance();
+			morph 		= dictionary.getMorphologicalProcessor();
+
+			// loading lookup caches 
+			stemCache = new HashMap();
+			for (Object pos : POS.getAllPOS()){
+				stemCache.put(((POS)pos).getKey(),new HashMap());
+			}
+
+			stopCache = new HashMap<String, Object>();
+			for (String s : Constants.stopWords){
+				stopCache.put(s, null);
+			}
+			relvCache = new HashMap<String, Object>();
+			for (String t : Constants.relevantPOS){
+				relvCache.put(t, null);
+			}
+
+
+			if (isInitialized()){
+				Constants.print("loading was succesfull");
+			}else{
+				Constants.print("loading was unsuccesfull");
+			}
+
+		} catch (FileNotFoundException e) {
+			e.printStackTrace();
+		} catch (InvalidFormatException e) {
+			e.printStackTrace();
+		} catch (IOException e) {
+			e.printStackTrace();
+		} catch (JWNLException e) {
+			e.printStackTrace();
+		}
+	}
+
+	public static void unload ()
+	{ 
+		dictionary.close();
+	}
+
+
+
+}

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,158 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.PointerUtils;
+import net.sf.extjwnl.data.Synset;
+import net.sf.extjwnl.data.Word;
+import net.sf.extjwnl.data.list.PointerTargetNode;
+import net.sf.extjwnl.data.list.PointerTargetNodeList;
+
+
+
+/**
+ * Convenience class to access some features.
+ */
+
+public class Node {
+
+  public Synset parent;
+  public Synset synset;
+  
+  protected ArrayList<WordPOS> senseRelevantWords;
+
+  public ArrayList<Synset> hypernyms = new ArrayList<Synset>();
+  public ArrayList<Synset> hyponyms = new ArrayList<Synset>();
+  public ArrayList<Synset> meronyms = new ArrayList<Synset>();
+  public ArrayList<Synset> holonyms = new ArrayList<Synset>();
+  
+  public ArrayList<WordPOS> synonyms = new ArrayList<WordPOS>();
+  
+  
+  public Node(Synset parent, Synset synSet, ArrayList<WordPOS> senseRelevantWords) {
+	    this.parent = parent;
+	    this.synset = synSet;
+	    this.senseRelevantWords = senseRelevantWords;
+	  }
+	  
+  public Node(Synset synSet, ArrayList<WordPOS> senseRelevantWords) {
+		    this.synset = synSet;
+		    this.senseRelevantWords = senseRelevantWords;
+	    }
+  
+  
+	public ArrayList<WordPOS> getSenseRelevantWords() {
+		return senseRelevantWords;
+	}
+
+	public void setSenseRelevantWords(ArrayList<WordPOS> senseRelevantWords) {
+		this.senseRelevantWords = senseRelevantWords;
+	}
+	  
+  public String getSense() {
+    return this.synset.getGloss().toString();
+  }
+
+  
+  public void setHypernyms() {
+  //  PointerUtils pointerUtils = PointerUtils.get();
+    PointerTargetNodeList phypernyms = new PointerTargetNodeList();
+    try {
+      phypernyms = PointerUtils.getDirectHypernyms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  hypernyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < phypernyms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) phypernyms.get(i);
+      this.hypernyms.add(ptn.getSynset());
+    }
+
+  }
+
+  public void setMeronyms() {
+    //PointerUtils pointerUtils = PointerUtils.getInstance();
+    PointerTargetNodeList pmeronyms = new PointerTargetNodeList();
+    try {
+    	pmeronyms = PointerUtils.getMeronyms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  meronyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < pmeronyms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) pmeronyms.get(i);
+      this.meronyms.add(ptn.getSynset());
+    }
+  }
+  
+  public void setHolonyms() {
+	   // PointerUtils pointerUtils = PointerUtils.getInstance();
+	    PointerTargetNodeList pholonyms = new PointerTargetNodeList();
+	    try {
+	    	pholonyms = PointerUtils.getHolonyms(this.synset);
+	    } catch (JWNLException e) {
+	      e.printStackTrace();
+	    } catch (NullPointerException e) {
+	      System.err.println("Error finding the  holonyms");
+	      e.printStackTrace();
+	    }
+
+	    for (int i = 0; i < pholonyms.size(); i++) {
+	      PointerTargetNode ptn = (PointerTargetNode) pholonyms.get(i);
+	      this.holonyms.add(ptn.getSynset());
+	    }
+
+	  }
+  
+  public void setHyponyms() {
+	  //  PointerUtils pointerUtils = PointerUtils.getInstance();
+	    PointerTargetNodeList phyponyms = new PointerTargetNodeList();
+	    try {
+	      phyponyms = PointerUtils.getDirectHyponyms(this.synset);
+	    } catch (JWNLException e) {
+	      e.printStackTrace();
+	    } catch (NullPointerException e) {
+	      System.err.println("Error finding the  hyponyms");
+	      e.printStackTrace();
+	    }
+
+	    for (int i = 0; i < phyponyms.size(); i++) {
+	      PointerTargetNode ptn = (PointerTargetNode) phyponyms.get(i);
+	      this.hyponyms.add(ptn.getSynset());
+	    }
+	  }
+  
+  public void setSynonyms()
+  {
+    for (Word word : synset.getWords())
+      synonyms.add(new WordPOS(word.toString(),word.getPOS()));
+  }
+  
+  public ArrayList<Synset> getHypernyms() {
+	  return hypernyms;
+  }
+  
+  public ArrayList<Synset> getHyponyms() {
+	  return hyponyms;
+  }
+  
+  public ArrayList<Synset> getMeronyms() {
+	  return meronyms;
+  }
+  public ArrayList<Synset> getHolonyms() {
+	  return holonyms;
+  }
+
+  public ArrayList<WordPOS> getSynonyms()
+  {
+    return synonyms;
+  }
+
+}

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,163 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.IndexWord;
+import net.sf.extjwnl.data.POS;
+import opennlp.tools.util.Span;
+
+
+
+public class PreProcessor {
+
+	public PreProcessor() {
+		super();
+	}
+
+	public static String[] split(String text) {
+		return Loader.getSDetector().sentDetect(text);
+	}
+
+	public static String[] tokenize(String sentence) {
+		return Loader.getTokenizer().tokenize(sentence);		
+	}
+
+	public static String[] tag(String[] tokenizedSentence) {
+		return Loader.getTagger().tag(tokenizedSentence);
+	}
+
+	public static String lemmatize(String word, String posTag) {
+		return Loader.getLemmatizer().lemmatize(word, posTag);
+	}
+
+	public static boolean isName(String word) {
+		Span nameSpans[] = Loader.getNameFinder().find(new String[] { word });
+		return (nameSpans.length != 0);
+	}
+
+	public static ArrayList<WordPOS> getAllRelevantWords(String[]  sentence) {
+
+		ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
+
+		String[] tags = tag(sentence);
+
+		for (int i = 0; i<sentence.length; i++) {
+			if (!Loader.getStopCache().containsKey(sentence[i])) {
+				if (Loader.getRelvCache().containsKey(tags[i])) {
+					relevantWords.add(new WordPOS(sentence[i],Constants.getPOS(tags[i])));	
+				}
+
+			}
+		}
+		return relevantWords;
+	}
+
+
+	public static ArrayList<WordPOS> getAllRelevantWords(WordToDisambiguate word) {
+		return getAllRelevantWords(word.getSentence());
+	}
+
+
+	public static ArrayList<WordPOS> getRelevantWords(WordToDisambiguate word, int winBackward, int winForward) {
+
+		ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
+
+		String[] sentence = word.getSentence();
+		String[] tags = tag(sentence);
+
+		int index = word.getWordIndex();
+
+		for (int i = index - winBackward; i<=index + winForward; i++) {
+
+			if (i >= 0 && i < sentence.length && i != index) {
+				if (!Loader.getStopCache().containsKey(sentence[i])) {
+
+					if (Loader.getRelvCache().containsKey(tags[i])) {
+						relevantWords.add(new WordPOS(sentence[i],Constants.getPOS(tags[i])));	
+					}
+
+				}
+			}
+		}
+		return relevantWords;
+	}
+
+	
+	/**
+	 * Stem a single word with WordNet dictionnary
+	 * 
+	 * @param wordToStem
+	 *            word to be stemmed
+	 * @return stemmed list of words
+	 */
+	public static List StemWordWithWordNet(WordPOS wordToStem) {
+		if (!Loader.isInitialized() 
+				|| wordToStem == null)
+			return null;
+		ArrayList<String> stems = new ArrayList();
+		try {
+			for (Object pos : POS.getAllPOS()){
+				stems.addAll(Loader.getMorph().lookupAllBaseForms((POS)pos, wordToStem.getWord())) ;
+			}
+			
+			if (stems.size()>0)
+				return stems;
+			else{
+				return null;
+			}
+			
+		} catch (JWNLException e) {
+			e.printStackTrace();
+		}
+		return null;
+	}
+
+	/**
+	 * Stem a single word tries to look up the word in the stemCache HashMap If
+	 * the word is not found it is stemmed with WordNet and put into stemCache
+	 * 
+	 * @param wordToStem
+	 *            word to be stemmed
+	 * @return stemmed word list, null means the word is incorrect 
+	 */
+	public static List Stem(WordPOS wordToStem) {
+		
+		// check if we already cached the stem map
+		HashMap posMap 	 	= (HashMap) Loader.getStemCache().get(wordToStem.getPOS().getKey());
+		
+		// don't check words with digits in them
+		if (containsNumbers(wordToStem.getWord())){
+			return null;
+		}
+						
+		List stemList = (List) posMap.get(wordToStem.getWord());
+		if (stemList != null){ // return it if we already cached it
+			return stemList;
+			
+		} else { // unCached list try to stem it
+			stemList = StemWordWithWordNet(wordToStem);
+			if (stemList != null) {
+				// word was recognized and stemmed with wordnet:
+				// add it to cache and return the stemmed list
+				posMap.put(wordToStem.getWord(),stemList);
+				Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
+				return stemList;
+			}else{ // could not be stemmed add it anyway (as incorrect with null list)
+				posMap.put(wordToStem.getWord(), null);
+				Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
+				return null;
+			}
+		}
+	}
+	
+	public static boolean containsNumbers(String word) {
+		// checks if the word is or contains a number
+		return word.matches(".*[0-9].*");
+	}
+	
+	
+	
+}

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,15 @@
+package opennlp.tools.disambiguator;
+
+import opennlp.tools.util.Span;
+
+/**
+ * The interface for word sense disambiguators.
+ */
+public interface WSDisambiguator {
+	
+	public String[] disambiguate(String[] inputText,int inputWordIndex); 
+	
+	public String[] disambiguate(String[] inputText, Span[] inputWordSpans);
+	
+
+}
\ No newline at end of file

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,94 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.IndexWord;
+import net.sf.extjwnl.data.POS;
+import net.sf.extjwnl.data.Synset;
+import net.sf.extjwnl.dictionary.Dictionary;
+
+
+public class WordPOS {
+
+	private String word;
+	private List stems;
+	private POS pos;
+
+	// Constructor
+	public WordPOS(String word, POS pos) throws IllegalArgumentException{
+		if (word==null || pos ==null){
+			throw new IllegalArgumentException("Args are null");
+		}
+		this.word = word;
+		this.pos = pos;
+	}
+
+	public String getWord() {
+		return word;
+	}
+
+	public POS getPOS() {
+		return pos;
+	}
+
+	public List getStems() {
+		if (stems==null){
+			return PreProcessor.Stem(this);
+		}else{
+			return stems;
+		}
+	}
+
+
+	// Return the synsets (thus the senses) of the current word
+	public ArrayList<Synset> getSynsets() {
+
+		IndexWord indexWord;
+		try {
+			indexWord = Loader.getDictionary().lookupIndexWord(pos, word);
+			List<Synset> synsets = indexWord.getSenses();
+			return (new ArrayList<Synset>(synsets));
+		} catch (JWNLException e) {
+			e.printStackTrace();
+		}
+		return null;
+	}
+
+	// uses Stemming to check if two words are equivalent
+	public boolean isStemEquivalent(WordPOS wordToCompare) {
+		// check if there is intersection in the stems;
+		List originalList  = this.getStems();
+		List listToCompare = wordToCompare.getStems();
+
+//		Constants.print("+++++++++++++++++++++  ::: "+ this.getWord());
+//		Constants.print("+++++++++++++++++++++  ::: "+ wordToCompare.getWord());
+//		Constants.print("the first list is \n"+originalList.toString());
+//		Constants.print("the second list is \n"+listToCompare.toString());
+
+		if(originalList==null || listToCompare==null){ // any of the two requested words do not exist
+			return false;
+		}else{
+			return !Collections.disjoint(originalList, listToCompare);
+		}
+
+	}
+
+
+	// uses Lemma to check if two words are equivalent
+	public boolean isLemmaEquivalent(WordPOS wordToCompare) {
+		// TODO use lemmatizer to compare with lemmas
+ 				
+		ArrayList<String> lemmas_word = new ArrayList();
+		ArrayList<String> lemmas_wordToCompare = new ArrayList();
+		
+		for (String pos : Constants.allPOS){
+			Loader.getLemmatizer().lemmatize(wordToCompare.getWord(), pos);
+		}
+		return false;
+	}
+
+}

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,75 @@
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+import java.util.concurrent.Semaphore;
+
+import opennlp.tools.disambiguator.lesk.WTDLesk;
+
+public class WordSense implements Comparable{ 
+	
+	protected WTDLesk WTDLesk;
+	protected Node node;
+	protected int id;
+	protected double score;
+	
+	
+	public WordSense(WTDLesk WTDLesk, Node node) {
+		super();
+		this.WTDLesk = WTDLesk;
+		this.node = node;
+	}
+
+	public WordSense() {
+		super();
+	}
+
+	
+	public WTDLesk getWTDLesk() {
+		return WTDLesk;
+	}
+
+	public void setWTDLesk(WTDLesk WTDLesk) {
+		this.WTDLesk = WTDLesk;
+	}
+
+	
+	public Node getNode() {
+		return node;
+	}
+
+	public void setNode(Node node) {
+		this.node = node;
+	}
+
+	
+	public double getScore() {
+		return score;
+	}
+
+	public void setScore(double score) {
+		this.score = score;
+	}
+
+	public int getId() {
+		return id;
+	}
+
+	public void setId(int id) {
+		this.id = id;
+	}
+
+
+	public int compareTo(Object o) {
+		return (this.score-((WordSense)o).score)<0?1:-1;
+	}
+	
+	
+	public String getSense() {
+		return node.getSense();
+	}
+	
+
+
+}
+
+

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,95 @@
+package opennlp.tools.disambiguator;
+
+
+
+public class WordToDisambiguate {
+	
+	protected String [] sentence;
+	protected int wordIndex;
+	protected String posTag;
+
+	protected int sense;
+	
+	
+	
+	/**
+	 * Constructor
+	 */
+	
+	
+	public WordToDisambiguate(String[] sentence, int wordIndex, int sense) throws IllegalArgumentException{
+		super();
+		
+		if (wordIndex>sentence.length){
+			throw new IllegalArgumentException("The index is out of bounds !");
+		}
+		this.sentence = sentence;
+		this.wordIndex = wordIndex;
+		String[] posTags = PreProcessor.tag(sentence);
+		this.posTag = posTags[wordIndex];
+		this.sense = sense;
+	}
+	
+	public WordToDisambiguate(String[] sentence, int wordIndex) {
+		this(sentence,wordIndex,-1);
+	}
+	
+		
+	
+	/**
+	 * Getters and Setters
+	 */
+	
+	
+
+	// sentence
+	public String[] getSentence() {
+		return sentence;
+	}
+
+	public void setSentence(String[] sentence) {
+		this.sentence = sentence;
+	}
+
+	
+	// word
+	public int getWordIndex() {
+		return wordIndex;
+	}
+
+	public void setWordIndex(int wordIndex) {
+		this.wordIndex = wordIndex;
+	}
+	
+	public String getWord(){
+		return sentence[wordIndex];
+	}
+	
+	
+	// posTag
+	public String getPosTag() {
+		return posTag;
+	}
+
+	public void setPosTag(String posTag) {
+		this.posTag = posTag;
+	}
+	
+	
+	// sense
+	public int getSense() {
+		return sense;
+	}
+
+	public void setSense(int sense) {
+		this.sense = sense;
+	}
+
+
+
+	
+	
+
+	
+
+}

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,112 @@
+package opennlp.tools.disambiguator.ims;
+
+import java.util.ArrayList;
+
+import opennlp.tools.disambiguator.Constants;
+import opennlp.tools.disambiguator.Loader;
+
+
+public class FeaturesExtractor {
+
+	
+	
+	public FeaturesExtractor() {
+		super();
+	}
+
+
+	/**
+	 * @Algorithm: IMS (It Makes Sense)
+	 * 
+	 * The following methods serve to extract the features for the algorithm IMS.
+	 */
+	
+	public String[] extractPosOfSurroundingWords (String[] sentence, int wordIndex, int numberOfWords) {
+		
+		String[] taggedSentence = Loader.getTagger().tag(sentence);
+		
+		String[] tags = new String[2*numberOfWords+1];
+		
+		int j = 0;
+		
+		for (int i = wordIndex - numberOfWords; i < wordIndex + numberOfWords ; i++) {
+			if (i < 0 || i >= sentence.length) {
+				tags[j] = "null";
+			} else {
+				tags[j] = taggedSentence[i];
+			}
+			j++;
+		}
+		
+		return tags;
+	}
+	
+	
+	public String[] extractSurroundingWords(String[] sentence, int wordIndex) {
+
+		String[] posTags = Loader.getTagger().tag(sentence);
+		
+		Constants.print(posTags);
+		
+		ArrayList<String> contextWords = new ArrayList<String>();
+
+		for (int i = 0; i < sentence.length; i++) {
+
+			if (!Constants.stopWords.contains(sentence[i].toLowerCase())
+					&& (wordIndex != i)) {
+				
+				String word = sentence[i].toLowerCase().replaceAll("[^a-z]", "").trim();
+				
+				if (!word.equals("")) {
+					String lemma = Loader.getLemmatizer().lemmatize(sentence[i], posTags[i]);
+					contextWords.add(lemma);
+				}
+				
+				
+
+
+			}
+		}
+
+		return contextWords.toArray(new String[contextWords.size()]);
+	}
+	
+	
+	public ArrayList<String[]> extractLocalCollocations(String[] sentence, int wordIndex, int range) {
+		/**
+		 * Here the author used only 11 features of this type. the range was set to 3 (bigrams extracted in a way that they are at max separated
+		 * by 1 word).
+		 */
+		
+		ArrayList<String[]> localCollocations = new ArrayList<String[]>();
+		
+		for (int i = wordIndex - range; i <= wordIndex + range ; i++) {
+			
+			if (!(i < 0 || i > sentence.length - 2)) {
+					if ((i != wordIndex) && (i+1 != wordIndex) && (i+1 < wordIndex + range)) {
+						String[] lc =  {sentence[i], sentence[i+1]};
+						localCollocations.add(lc);
+					}
+					if ((i != wordIndex) && (i+2 != wordIndex) && (i+2 < wordIndex + range)) {
+						String[] lc =  {sentence[i], sentence[i+2]};
+						localCollocations.add(lc);
+					}
+			}
+			
+		}
+		
+		return localCollocations;
+	}
+
+	
+	/**
+	 * @Algorithm: SST
+	 * 
+	 * The following methods serve to extract the features for the algorithm SST.
+	 */
+	
+	
+	
+	
+	
+}

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,99 @@
+package opennlp.tools.disambiguator.ims;
+
+import java.util.ArrayList;
+
+import opennlp.tools.disambiguator.WSDisambiguator;
+import opennlp.tools.util.Span;
+
+public class IMS implements WSDisambiguator{
+	
+	FeaturesExtractor fExtractor = new FeaturesExtractor();
+	
+	/**
+	 * PARAMETERS
+	 */
+	
+	int numberOfSurroundingWords;
+	int ngram;
+	
+	
+	
+	/**
+	 * Constructors
+	 */
+	
+	public IMS() {
+		super();
+		numberOfSurroundingWords = 3;
+		ngram = 2;
+	}
+	
+	public IMS(int numberOfSurroundingWords, int ngram) {
+		super();
+		this.numberOfSurroundingWords = numberOfSurroundingWords;
+		this.ngram = ngram;
+	}
+	
+	
+	
+	/**
+	 * INTERNAL METHODS
+	 */
+	
+	private void extractFeature(ArrayList<WTDIMS> words) {
+		
+		for (WTDIMS word : words) {
+			
+			word.setPosOfSurroundingWords(fExtractor.extractPosOfSurroundingWords(word.getSentence(), word.getWordIndex(), numberOfSurroundingWords));
+			
+			word.setSurroundingWords(fExtractor.extractSurroundingWords(word.getSentence(), word.getWordIndex()));
+			
+			word.setLocalCollocations(fExtractor.extractLocalCollocations(word.getSentence(), word.getWordIndex(), ngram));
+			
+		}
+
+	}
+	
+	private ArrayList<WTDIMS> extractTrainingData(String xmlFile) {
+		
+		ArrayList<WTDIMS> trainingData = new ArrayList<WTDIMS>();
+		
+		/**
+		 * TODO Processing of the xml File here (To check the format of the data)
+		 */
+		
+		return trainingData;
+	}
+	
+	
+	public void train(String trainingSetFile) { // TODO To revise after finihsing the implementation of the collector
+		
+		ArrayList<WTDIMS> instances = extractTrainingData(trainingSetFile);
+		
+		extractFeature(instances);
+		
+		
+		
+	}
+	
+	
+	public void load (String binFile) {
+		// TODO After finishing training the training data
+				
+	}
+	
+
+	@Override
+	public String[] disambiguate(String[] inputText, int inputWordIndex) {
+		// TODO Auto-generated method stub
+		return null;
+	}
+
+	@Override
+	public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
+		// TODO Auto-generated method stub
+		return null;
+	}
+	
+
+}

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,56 @@
+package opennlp.tools.disambiguator.ims;
+import java.util.ArrayList;
+
+import opennlp.tools.disambiguator.WordToDisambiguate;
+
+
+public class WTDIMS extends WordToDisambiguate {
+		
+	protected String[] posOfSurroundingWords;
+	protected String[] surroundingWords;
+	protected ArrayList<String[]> localCollocations;
+	
+	
+	
+	/**
+	 * Constructor
+	 */
+	public WTDIMS(String[] sentence, int word, int sense) {
+		super(sentence, word, sense);
+	}
+
+	
+	
+	/**
+	 * Getters and Setters
+	 */
+	
+	
+	public String[] getPosOfSurroundingWords() {
+		return posOfSurroundingWords;
+	}
+
+	public void setPosOfSurroundingWords(String[] posOfSurroundingWords) {
+		this.posOfSurroundingWords = posOfSurroundingWords;
+	}
+	
+
+	public String[] getSurroundingWords() {
+		return surroundingWords;
+	}
+
+	public void setSurroundingWords(String[] surroundingWords) {
+		this.surroundingWords = surroundingWords;
+	}
+
+	
+	public ArrayList<String[]> getLocalCollocations() {
+		return localCollocations;
+	}
+
+	public void setLocalCollocations(ArrayList<String[]> localCollocations) {
+		this.localCollocations = localCollocations;
+	}
+	
+	
+}

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,685 @@
+package opennlp.tools.disambiguator.lesk;
+
+import java.security.InvalidParameterException;
+import java.util.ArrayList;
+
+
+
+import java.util.Collections;
+import java.util.Map;
+
+import opennlp.tools.disambiguator.Constants;
+import opennlp.tools.disambiguator.Loader;
+import opennlp.tools.disambiguator.Node;
+import opennlp.tools.disambiguator.PreProcessor;
+import opennlp.tools.disambiguator.WSDisambiguator;
+import opennlp.tools.disambiguator.WordPOS;
+import opennlp.tools.disambiguator.WordSense;
+import opennlp.tools.util.Span;
+import net.sf.extjwnl.data.Synset;
+
+
+/**
+ * Class for the Lesk algorithm and variants.
+ */
+
+public class Lesk implements WSDisambiguator{
+
+	protected LeskParameters params;
+
+	public Loader loader;
+
+	public Lesk(){
+		this(null);
+	}
+
+	public Lesk(LeskParameters params) throws InvalidParameterException{
+		loader = new Loader();
+		this.setParams(params);
+	}
+
+	public void setParams(LeskParameters params) throws InvalidParameterException{
+		if(params==null){
+			this.params = new LeskParameters();
+		} 
+		else{
+			if (params.isValid()){
+				this.params = params;
+			}else{
+				throw new InvalidParameterException("wrong params");
+			}
+		}
+	}
+
+	public ArrayList<WordSense> basic(WTDLesk wtd) {
+
+		ArrayList<WordPOS> relvWords = PreProcessor.getAllRelevantWords(wtd);
+		WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+
+		ArrayList<Synset> synsets = word.getSynsets();
+		ArrayList<Node> nodes = new ArrayList<Node>();
+
+		for (Synset synset : synsets) {
+			Node node = new Node(synset, relvWords);
+			nodes.add(node);
+		}
+
+		ArrayList<WordSense> scoredSenses = updateSenses(nodes);
+
+		for (WordSense wordSense : scoredSenses) {
+			wordSense.setWTDLesk(wtd);
+			int count = 0;
+			for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
+				ArrayList stems =  (ArrayList)PreProcessor.Stem(senseWordPOS);
+				for (WordPOS sentenceWordPOS : relvWords) {
+					// TODO change to lemma check
+					if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
+						count = count + 1;
+					}
+				}
+			}
+			wordSense.setScore(count);
+		}
+
+		return scoredSenses;
+	}
+
+	public ArrayList<WordSense> basicContextual(WTDLesk wtd) {
+		return this.basicContextual(wtd,LeskParameters.DFLT_WIN_SIZE);
+	}
+
+	public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowSize) {
+		return this.basicContextual(wtd, windowSize,windowSize);
+	}
+
+	public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowBackward, int windowForward) {
+
+		ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd, windowBackward, windowForward);
+		WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+
+		ArrayList<Synset> synsets = word.getSynsets();
+		ArrayList<Node> nodes = new ArrayList<Node>();
+
+
+		for (Synset synset : synsets) {
+			Node node = new Node(synset, relvWords);
+			nodes.add(node);
+		}
+
+		ArrayList<WordSense> scoredSenses = updateSenses(nodes);
+
+
+		for (WordSense wordSense : scoredSenses) {
+			wordSense.setWTDLesk(wtd);
+
+			int count = 0;
+			for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
+
+				for (WordPOS sentenceWordPOS : relvWords) {
+					// TODO change to lemma check
+					if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
+						count = count + 1;
+					}
+				}
+
+			}
+			wordSense.setScore(count);
+
+		}
+
+		Collections.sort(scoredSenses);
+
+		return scoredSenses;
+	}
+
+	public ArrayList<WordSense> extended(WTDLesk wtd,
+			int depth, double depthScoreWeight, boolean includeSynonyms,
+			boolean includeHypernyms, boolean includeHyponyms,
+			boolean includeMeronyms, boolean includeHolonyms) {
+
+		return extendedContextual(wtd, 0, depth,
+				depthScoreWeight, includeSynonyms, includeHypernyms,
+				includeHyponyms, includeMeronyms, includeHolonyms);
+
+	}
+
+	public ArrayList<WordSense> extendedContextual(WTDLesk wtd, 
+			int depth, double depthScoreWeight,
+			boolean includeSynonyms, boolean includeHypernyms,
+			boolean includeHyponyms, boolean includeMeronyms,
+			boolean includeHolonyms){
+
+		return extendedContextual(wtd, LeskParameters.DFLT_WIN_SIZE,
+				depth, depthScoreWeight, includeSynonyms, includeHypernyms,
+				includeHyponyms, includeMeronyms, includeHolonyms);
+
+	}
+
+	public ArrayList<WordSense> extendedContextual(WTDLesk wtd, 
+			int windowSize, int depth, double depthScoreWeight,
+			boolean includeSynonyms, boolean includeHypernyms,
+			boolean includeHyponyms, boolean includeMeronyms,
+			boolean includeHolonyms) {
+
+		return extendedContextual(wtd, windowSize, windowSize,
+				depth, depthScoreWeight, includeSynonyms, includeHypernyms,
+				includeHyponyms, includeMeronyms, includeHolonyms);
+	}
+
+	public ArrayList<WordSense> extendedContextual(WTDLesk wtd,
+			int windowBackward, int windowForward, int depth,
+			double depthScoreWeight, boolean includeSynonyms,
+			boolean includeHypernyms, boolean includeHyponyms,
+			boolean includeMeronyms, boolean includeHolonyms) {
+
+		ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,windowBackward,windowForward);
+		WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+
+		ArrayList<Synset> synsets = word.getSynsets();
+		ArrayList<Node> nodes = new ArrayList<Node>();
+
+		for (Synset synset : synsets) {
+			Node node = new Node(synset, relvWords);
+			nodes.add(node);
+		}
+
+		ArrayList<WordSense> scoredSenses = basicContextual(wtd,windowBackward, windowForward);
+
+		for (WordSense wordSense : scoredSenses) {
+
+			if (includeSynonyms) {
+				wordSense.setScore(wordSense.getScore()
+						+ depthScoreWeight
+						* assessSynonyms(wordSense.getNode().getSynonyms(),relvWords));
+			}
+
+			if (includeHypernyms) {
+				fathomHypernyms(wordSense, wordSense.getNode().synset,
+						relvWords, depth, depth, depthScoreWeight);
+			}
+
+			if (includeHyponyms) {
+
+				fathomHyponyms(wordSense, wordSense.getNode().synset,
+						relvWords, depth, depth, depthScoreWeight);
+			}
+
+			if (includeMeronyms) {
+
+				fathomMeronyms(wordSense, wordSense.getNode().synset,
+						relvWords, depth, depth, depthScoreWeight);
+
+			}
+
+			if (includeHolonyms) {
+
+				fathomHolonyms(wordSense, wordSense.getNode().synset,
+						relvWords, depth, depth, depthScoreWeight);
+
+			}
+
+		}
+
+		return scoredSenses;
+
+	}
+
+	public ArrayList<WordSense> extendedExponential(WTDLesk wtd, 
+			int depth,
+			double intersectionExponent,double depthExponent, boolean includeSynonyms,
+			boolean includeHypernyms, boolean includeHyponyms,
+			boolean includeMeronyms, boolean includeHolonyms) {
+
+		return extendedExponentialContextual(wtd, 0,  depth,
+				intersectionExponent, depthExponent,  includeSynonyms,
+				includeHypernyms,  includeHyponyms,
+				includeMeronyms,  includeHolonyms);
+
+	}
+
+	public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd, 
+			int depth,
+			double intersectionExponent,double depthExponent, boolean includeSynonyms,
+			boolean includeHypernyms, boolean includeHyponyms,
+			boolean includeMeronyms, boolean includeHolonyms) {
+
+		return extendedExponentialContextual(wtd, LeskParameters.DFLT_WIN_SIZE,
+				depth, intersectionExponent,depthExponent, includeSynonyms, includeHypernyms,
+				includeHyponyms, includeMeronyms, includeHolonyms);
+	}
+
+	public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd, 
+			int windowSize, int depth,
+			double intersectionExponent,double depthExponent, boolean includeSynonyms,
+			boolean includeHypernyms, boolean includeHyponyms,
+			boolean includeMeronyms, boolean includeHolonyms) {
+
+		return extendedExponentialContextual(wtd, windowSize, windowSize,
+				depth, intersectionExponent,depthExponent, includeSynonyms, includeHypernyms,
+				includeHyponyms, includeMeronyms, includeHolonyms);
+	}
+
+	public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd, 
+			int windowBackward, int windowForward, int depth,
+			double intersectionExponent,double depthExponent, boolean includeSynonyms,
+			boolean includeHypernyms, boolean includeHyponyms,
+			boolean includeMeronyms, boolean includeHolonyms) {
+		ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,windowBackward,windowForward);
+		WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+
+		ArrayList<Synset> synsets = word.getSynsets();
+		ArrayList<Node> nodes = new ArrayList<Node>();
+
+		for (Synset synset : synsets) {
+			Node node = new Node(synset, relvWords);
+			nodes.add(node);
+		}
+
+		ArrayList<WordSense> scoredSenses = basicContextual(wtd, windowForward, windowBackward);
+
+		for (WordSense wordSense : scoredSenses) {
+
+
+			if (includeSynonyms) {
+				wordSense.setScore(wordSense.getScore() + Math.pow(assessSynonyms(wordSense.getNode().getSynonyms(),
+						relvWords),intersectionExponent));
+			}
+
+			if (includeHypernyms) {
+				fathomHypernymsExponential(wordSense, wordSense.getNode().synset,
+						relvWords, depth, depth,intersectionExponent, depthExponent);
+			}
+
+			if (includeHyponyms) {
+
+				fathomHyponymsExponential(wordSense, wordSense.getNode().synset,
+						relvWords, depth, depth, intersectionExponent,depthExponent);
+			}
+
+			if (includeMeronyms) {
+
+				fathomMeronymsExponential(wordSense, wordSense.getNode().synset,
+						relvWords, depth, depth, intersectionExponent,depthExponent);
+
+			}
+
+			if (includeHolonyms) {
+
+				fathomHolonymsExponential(wordSense, wordSense.getNode().synset,
+						relvWords, depth, depth, intersectionExponent,depthExponent);
+
+			}
+
+		}
+
+		return scoredSenses;
+
+	}
+
+	private void fathomHypernyms(WordSense wordSense, Synset child,
+			ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+			double depthScoreWeight) {
+		if (depth == 0)
+			return;
+
+		String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
+		ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+
+		Node childNode = new Node(child, relvGlossWords);
+
+		childNode.setHypernyms();
+		wordSense.setScore(wordSense.getScore()
+				+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
+				* assessFeature(childNode.getHypernyms(), relvWords));
+		for (Synset hypernym : childNode.getHypernyms()) {
+			fathomHypernyms(wordSense, hypernym, relvGlossWords, depth - 1, maxDepth,
+					depthScoreWeight);
+		}
+	}
+
+	private void fathomHypernymsExponential(WordSense wordSense, Synset child,
+			ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+			double intersectionExponent, double depthScoreExponent) {
+		if (depth == 0)
+			return;
+
+		String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
+		ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+		Node childNode = new Node(child, relvGlossWords);
+
+		childNode.setHypernyms();
+		wordSense
+		.setScore(wordSense.getScore()
+				+ Math.pow(
+						assessFeature(childNode.getHypernyms(),
+								relvWords), intersectionExponent)
+								/ Math.pow(depth, depthScoreExponent));
+		for (Synset hypernym : childNode.getHypernyms()) {
+
+			fathomHypernymsExponential(wordSense, hypernym, relvGlossWords, depth - 1, maxDepth,
+					intersectionExponent, depthScoreExponent);
+		}
+	}
+
+	private void fathomHyponyms(WordSense wordSense, Synset child,
+			ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+			double depthScoreWeight) {
+		if (depth == 0)
+			return;
+
+		String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
+		ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+		Node childNode = new Node(child, relvGlossWords);
+
+		childNode.setHyponyms();
+		wordSense.setScore(wordSense.getScore()
+				+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
+				* assessFeature(childNode.getHyponyms(), relvWords));
+		for (Synset hyponym : childNode.getHyponyms()) {
+
+			fathomHyponyms(wordSense, hyponym, relvGlossWords, depth - 1, maxDepth,
+					depthScoreWeight);
+		}
+	}
+
+	private void fathomHyponymsExponential(WordSense wordSense, Synset child,
+			ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+			double intersectionExponent, double depthScoreExponent) {
+		if (depth == 0)
+			return;
+
+		String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
+		ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+		Node childNode = new Node(child, relvGlossWords);
+
+		childNode.setHyponyms();
+		wordSense.setScore(wordSense.getScore()
+				+ Math.pow(
+						assessFeature(childNode.getHyponyms(), relvWords),
+						intersectionExponent)
+						/ Math.pow(depth, depthScoreExponent));
+		for (Synset hyponym : childNode.getHyponyms()) {
+
+			fathomHyponymsExponential(wordSense, hyponym, relvGlossWords, depth - 1, maxDepth,
+					intersectionExponent, depthScoreExponent);
+		}
+	}
+
+	private void fathomMeronyms(WordSense wordSense, Synset child,
+			ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+			double depthScoreWeight) {
+		if (depth == 0)
+			return;
+
+		String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
+		ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+		Node childNode = new Node(child, relvGlossWords);
+
+		childNode.setMeronyms();
+		wordSense.setScore(wordSense.getScore()
+				+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
+				* assessFeature(childNode.getMeronyms(), relvWords));
+		for (Synset meronym : childNode.getMeronyms()) {
+
+			fathomMeronyms(wordSense, meronym, relvGlossWords, depth - 1, maxDepth,
+					depthScoreWeight);
+		}
+	}
+
+	private void fathomMeronymsExponential(WordSense wordSense, Synset child,
+			ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+			double intersectionExponent, double depthScoreExponent) {
+		if (depth == 0)
+			return;
+
+		String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
+		ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+		Node childNode = new Node(child, relvGlossWords);
+
+		childNode.setMeronyms();
+		wordSense.setScore(wordSense.getScore()
+				+ Math.pow(
+						assessFeature(childNode.getMeronyms(), relvWords),
+						intersectionExponent)
+						/ Math.pow(depth, depthScoreExponent));
+		for (Synset meronym : childNode.getMeronyms()) {
+
+			fathomMeronymsExponential(wordSense, meronym, relvGlossWords, depth - 1, maxDepth,
+					intersectionExponent, depthScoreExponent);
+		}
+	}
+
+	private void fathomHolonyms(WordSense wordSense, Synset child,
+			ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+			double depthScoreWeight) {
+		if (depth == 0)
+			return;
+
+		String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
+		ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+		Node childNode = new Node(child, relvGlossWords);
+
+
+		childNode.setHolonyms();
+		wordSense.setScore(wordSense.getScore()
+				+ Math.pow(depthScoreWeight, maxDepth - depth + 1)
+				* assessFeature(childNode.getHolonyms(), relvWords));
+		for (Synset holonym : childNode.getHolonyms()) {
+
+			fathomHolonyms(wordSense, holonym, relvGlossWords, depth - 1, maxDepth,
+					depthScoreWeight);
+		}
+	}
+
+	private void fathomHolonymsExponential(WordSense wordSense, Synset child,
+			ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+			double intersectionExponent, double depthScoreExponent) {
+		if (depth == 0)
+			return;
+
+		String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
+		ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
+
+		Node childNode = new Node(child, relvGlossWords);
+
+		childNode.setHolonyms();
+		wordSense.setScore(wordSense.getScore()
+				+ Math.pow(
+						assessFeature(childNode.getHolonyms(), relvWords),
+						intersectionExponent)
+						/ Math.pow(depth, depthScoreExponent));
+		for (Synset holonym : childNode.getHolonyms()) {
+
+			fathomHolonymsExponential(wordSense, holonym, relvGlossWords, depth - 1, maxDepth,
+					intersectionExponent, depthScoreExponent);
+		}
+	}
+
+	private int assessFeature(ArrayList<Synset> featureSynsets,
+			ArrayList<WordPOS> relevantWords) {
+		int count = 0;
+		for (Synset synset : featureSynsets) {
+			Node subNode = new Node(synset, relevantWords);
+
+			String[] tokenizedSense = Loader.getTokenizer().tokenize(subNode.getSense());
+			ArrayList<WordPOS> relvSenseWords = PreProcessor.getAllRelevantWords(tokenizedSense);
+
+			for (WordPOS senseWord : relvSenseWords) {
+				for (WordPOS sentenceWord : relevantWords) {
+					if (sentenceWord.isStemEquivalent(senseWord)) {
+						count = count + 1;
+					}
+				}
+			}
+		}
+		return count;
+	}
+
+	private int assessSynonyms(ArrayList<WordPOS> synonyms,
+			ArrayList<WordPOS> relevantWords) {
+		int count = 0;
+
+		for (WordPOS synonym : synonyms) {
+			for (WordPOS sentenceWord : relevantWords) {
+				// TODO try to switch to lemmatizer
+				if (sentenceWord.isStemEquivalent(synonym)) {
+					count = count + 1;
+				}
+			}
+
+		}
+
+		return count;
+	}
+
+	public ArrayList<WordSense> updateSenses(ArrayList<Node> nodes) {
+
+		ArrayList<WordSense> scoredSenses = new ArrayList<WordSense>();
+
+		for (int i=0; i< nodes.size(); i++ ) {
+			ArrayList<WordPOS> sensesComponents = PreProcessor.getAllRelevantWords(PreProcessor.tokenize(nodes.get(i).getSense()));
+			WordSense wordSense = new WordSense();
+			nodes.get(i).setSenseRelevantWords(sensesComponents);
+			wordSense.setNode(nodes.get(i));
+			wordSense.setId(i);
+			scoredSenses.add(wordSense);
+		}
+		return scoredSenses;
+
+	}
+
+	// disambiguates a WTDLesk and returns an array of sense indexes from WordNet ordered by their score
+	@Override
+	public String[] disambiguate(String[] inputText, int inputWordIndex) {
+		WTDLesk wtd = new WTDLesk(inputText,inputWordIndex);	
+		ArrayList<WordSense> wsenses = null;
+
+		switch(this.params.leskType){
+		case LESK_BASIC: 
+			wsenses = basic(wtd);
+			break;
+		case LESK_BASIC_CTXT : 
+			wsenses = basicContextual(wtd);
+			break;
+		case LESK_BASIC_CTXT_WIN : 
+			wsenses = basicContextual(wtd, this.params.win_b_size); 
+			break;
+		case LESK_BASIC_CTXT_WIN_BF : 
+			wsenses = basicContextual(wtd, this.params.win_b_size, this.params.win_f_size);
+			break;
+		case LESK_EXT : 
+			wsenses = extended(wtd, 
+					this.params.depth,
+					this.params.depth_weight, 
+					this.params.fathom_synonyms, 
+					this.params.fathom_hypernyms, 
+					this.params.fathom_hyponyms, 
+					this.params.fathom_meronyms, 
+					this.params.fathom_holonyms);
+			break;
+		case LESK_EXT_CTXT : 
+			wsenses = extendedContextual(wtd,
+					this.params.depth,
+					this.params.depth_weight, 
+					this.params.fathom_synonyms, 
+					this.params.fathom_hypernyms, 
+					this.params.fathom_hyponyms, 
+					this.params.fathom_meronyms, 
+					this.params.fathom_holonyms);
+			break;
+		case LESK_EXT_CTXT_WIN : 
+			wsenses = extendedContextual(wtd, 
+					this.params.win_b_size,
+					this.params.depth,
+					this.params.depth_weight, 
+					this.params.fathom_synonyms, 
+					this.params.fathom_hypernyms, 
+					this.params.fathom_hyponyms, 
+					this.params.fathom_meronyms, 
+					this.params.fathom_holonyms);
+			break;
+		case LESK_EXT_CTXT_WIN_BF :
+			wsenses = extendedContextual(wtd, 
+					this.params.win_b_size, 
+					this.params.win_f_size,
+					this.params.depth,
+					this.params.depth_weight, 
+					this.params.fathom_synonyms, 
+					this.params.fathom_hypernyms, 
+					this.params.fathom_hyponyms, 
+					this.params.fathom_meronyms, 
+					this.params.fathom_holonyms);
+			break;
+		case LESK_EXT_EXP : 
+			wsenses = extendedExponential(wtd, 
+					this.params.depth,
+					this.params.iexp,
+					this.params.dexp, 
+					this.params.fathom_synonyms, 
+					this.params.fathom_hypernyms, 
+					this.params.fathom_hyponyms, 
+					this.params.fathom_meronyms, 
+					this.params.fathom_holonyms);
+			break;
+		case LESK_EXT_EXP_CTXT : 
+			wsenses = extendedExponentialContextual(wtd,
+					this.params.depth,
+					this.params.iexp,
+					this.params.dexp, 
+					this.params.fathom_synonyms, 
+					this.params.fathom_hypernyms, 
+					this.params.fathom_hyponyms, 
+					this.params.fathom_meronyms, 
+					this.params.fathom_holonyms);
+			break;
+		case LESK_EXT_EXP_CTXT_WIN : 
+			wsenses = extendedExponentialContextual(wtd,
+					this.params.win_b_size,
+					this.params.depth,
+					this.params.iexp,
+					this.params.dexp, 
+					this.params.fathom_synonyms, 
+					this.params.fathom_hypernyms, 
+					this.params.fathom_hyponyms, 
+					this.params.fathom_meronyms, 
+					this.params.fathom_holonyms);
+			break;
+		case LESK_EXT_EXP_CTXT_WIN_BF : 
+			wsenses = extendedExponentialContextual(wtd,
+					this.params.win_b_size,
+					this.params.win_f_size,
+					this.params.depth,
+					this.params.iexp,
+					this.params.dexp, 
+					this.params.fathom_synonyms, 
+					this.params.fathom_hypernyms, 
+					this.params.fathom_hyponyms, 
+					this.params.fathom_meronyms, 
+					this.params.fathom_holonyms);
+			break;
+		}
+
+		wsenses = extendedExponentialContextual(wtd, LeskParameters.DFLT_WIN_SIZE,LeskParameters.DFLT_DEPTH,LeskParameters.DFLT_IEXP,LeskParameters.DFLT_DEXP, true,true,true,true,true );
+		Collections.sort(wsenses);
+
+		String[] senses = new String[wsenses.size()];
+		for (int i = 0; i < wsenses.size() ; i++) {
+			senses[i] = wsenses.get(i).getSense();
+		}
+		return senses;
+	}
+
+	@Override
+	public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
+		// TODO need to work on spans
+		return null;
+	}
+
+}
\ No newline at end of file

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,107 @@
+package opennlp.tools.disambiguator.lesk;
+
+public class LeskParameters {
+
+	// VARIATIONS
+	public static enum LESK_TYPE {
+		LESK_BASIC,
+		LESK_BASIC_CTXT,
+		LESK_BASIC_CTXT_WIN,
+		LESK_BASIC_CTXT_WIN_BF,
+		LESK_EXT,
+		LESK_EXT_CTXT,
+		LESK_EXT_CTXT_WIN,
+		LESK_EXT_CTXT_WIN_BF,
+		LESK_EXT_EXP,
+		LESK_EXT_EXP_CTXT,
+		LESK_EXT_EXP_CTXT_WIN,
+		LESK_EXT_EXP_CTXT_WIN_BF,
+	}
+
+	// DEFAULTS
+	protected static final LESK_TYPE DFLT_LESK_TYPE = LESK_TYPE.LESK_EXT_EXP_CTXT_WIN;
+	protected static final int DFLT_WIN_SIZE = 4;
+	protected static final int DFLT_DEPTH = 3;
+	protected static final double DFLT_IEXP = 0.3;
+	protected static final double DFLT_DEXP = 0.3;
+	
+		
+	public LESK_TYPE leskType;
+	public int win_f_size;
+	public int win_b_size;
+	public int depth;
+
+	public boolean fathom_synonyms;
+	public boolean fathom_hypernyms;
+	public boolean fathom_hyponyms;
+	public boolean fathom_meronyms;
+	public boolean fathom_holonyms;
+
+	public double depth_weight;
+	public double iexp;
+	public double dexp;
+	
+	
+	public LeskParameters(){
+		this.setDefaults();
+	}
+
+	public void setDefaults(){
+		this.leskType = LeskParameters.DFLT_LESK_TYPE;
+		this.win_f_size = LeskParameters.DFLT_WIN_SIZE;
+		this.win_b_size = LeskParameters.DFLT_WIN_SIZE;
+		this.depth = LeskParameters.DFLT_DEPTH;
+		this.iexp = LeskParameters.DFLT_IEXP;
+		this.dexp = LeskParameters.DFLT_DEXP;
+		this.fathom_holonyms = true;
+		this.fathom_hypernyms = true;
+		this.fathom_hyponyms = true;
+		this.fathom_meronyms = true;
+		this.fathom_synonyms = true;
+	}
+
+	// Parameter Validation
+	// TODO make isSet for semantic feature booleans
+	public boolean isValid(){
+
+		switch(this.leskType){
+		case LESK_BASIC: 
+		case LESK_BASIC_CTXT : 
+			return true;
+		case LESK_BASIC_CTXT_WIN : 
+			return (this.win_b_size==this.win_f_size) 
+					&& this.win_b_size>=0 ;
+		case LESK_BASIC_CTXT_WIN_BF : 
+			return (this.win_b_size>=0) 
+					&& (this.win_f_size>=0) ;
+		case LESK_EXT : 
+		case LESK_EXT_CTXT : 
+			return (this.depth>=0) 
+					&& (this.depth_weight >= 0);
+
+		case LESK_EXT_CTXT_WIN : 
+		case LESK_EXT_CTXT_WIN_BF :
+			return (this.depth>=0) 
+					&& (this.depth_weight >= 0)
+					&& (this.win_b_size>=0) 
+					&& (this.win_f_size>=0);
+
+		case LESK_EXT_EXP : 
+		case LESK_EXT_EXP_CTXT : 
+			return (this.depth>=0) 
+					&& (this.dexp >= 0)
+					&& (this.iexp>=0) ;
+
+		case LESK_EXT_EXP_CTXT_WIN : 
+		case LESK_EXT_EXP_CTXT_WIN_BF : 
+			return (this.depth>=0) 
+					&& (this.dexp >= 0)
+					&& (this.iexp>=0) 
+					&& (this.win_b_size>=0) 
+					&& (this.win_f_size>=0);
+		default :
+			return false;
+		}
+	}
+
+}

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,15 @@
+package opennlp.tools.disambiguator.lesk;
+
+import opennlp.tools.disambiguator.WordToDisambiguate;
+
+
+public class WTDLesk extends WordToDisambiguate{
+
+	public WTDLesk(String[] sentence, int wordIndex) {
+		super(sentence,wordIndex,-1);
+	}
+	
+
+	
+	
+}
\ No newline at end of file

Added: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java?rev=1687358&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java Wed Jun 24 20:19:05 2015
@@ -0,0 +1,83 @@
+package opennlp.tools.disambiguator;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.junit.Test;
+
+import opennlp.tools.cmdline.postag.POSModelLoader;
+import opennlp.tools.disambiguator.Constants;
+import opennlp.tools.disambiguator.Loader;
+import opennlp.tools.disambiguator.WordSense;
+import opennlp.tools.disambiguator.ims.FeaturesExtractor;
+import opennlp.tools.disambiguator.lesk.Lesk;
+import opennlp.tools.disambiguator.lesk.LeskParameters;
+import opennlp.tools.disambiguator.lesk.WTDLesk;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSTagger;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+
+
+public class Tester {
+
+	@Test
+	public static void main(String[] args) {
+
+
+		String sentence = "I went fishing for some sea bass.";
+		TokenizerModel TokenizerModel;
+		
+		try {
+			TokenizerModel = new TokenizerModel(new FileInputStream("src\\test\\resources\\opennlp\\tools\\disambiguator\\en-token.bin"));
+			Tokenizer tokenizer = new TokenizerME(TokenizerModel);
+
+			String[] words = tokenizer.tokenize(sentence);
+			
+			POSModel posTaggerModel = new POSModelLoader().load(new File("src\\test\\resources\\opennlp\\tools\\disambiguator\\en-pos-maxent.bin"));
+			POSTagger tagger = new POSTaggerME(posTaggerModel);
+				
+
+			Constants.print("\ntokens :");
+			Constants.print(words);
+			Constants.print(tagger.tag(words));
+			
+			Constants.print("\ntesting default lesk :");			
+			Lesk lesk = new Lesk();		
+			Constants.print(lesk.disambiguate(words, 6));
+			
+			Constants.print("\ntesting with null params :");
+			lesk.setParams(null);
+			Constants.print(lesk.disambiguate(words, 6));
+			
+			Constants.print("\ntesting with default params");
+			lesk.setParams(new LeskParameters());
+			Constants.print(lesk.disambiguate(words, 6));
+			
+			Constants.print("\ntesting with custom params :");
+			LeskParameters leskParams = new LeskParameters();
+			leskParams.leskType = LeskParameters.LESK_TYPE.LESK_BASIC_CTXT_WIN_BF;
+			leskParams.win_b_size = 4;
+			leskParams.depth = 3;
+			lesk.setParams(leskParams);
+			Constants.print(lesk.disambiguate(words, 6));
+				
+			/*
+			Constants.print("\ntesting with wrong params should throw exception :");
+			LeskParameters leskWrongParams = new LeskParameters();
+			leskWrongParams.depth = -1;
+			lesk.setParams(leskWrongParams);
+			Constants.print(lesk.disambiguate(words, 6));
+			*/
+
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+
+
+	}
+
+}