You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2015/06/25 11:20:30 UTC

svn commit: r1687455 [1/2] - in /opennlp/sandbox/opennlp-wsd/src: main/java/opennlp/tools/disambiguator/ main/java/opennlp/tools/disambiguator/ims/ main/java/opennlp/tools/disambiguator/lesk/ test/java/opennlp/tools/disambiguator/

Author: joern
Date: Thu Jun 25 09:20:30 2015
New Revision: 1687455

URL: http://svn.apache.org/r1687455
Log:
OPENNLP-758 Formatted the code according to OpenNLP code conventions

Modified:
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
    opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java Thu Jun 25 09:20:30 2015
@@ -5,130 +5,177 @@ import java.util.Arrays;
 
 import net.sf.extjwnl.data.POS;
 
-
 public class Constants {
-	
-	public static String osPathChar = "\\";
 
-	// List of all the PoS tags
-	public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ",
-			"JJR", "JJS", "LS", "MD", "NN", "NNS", "NNP", "NNPS", "PDT", "POS",
-			"PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH", "VB",
-			"VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB" };
-
-	// List of the PoS tags of which the senses are to be extracted
-	public static String[] relevantPOS = { "JJ", "JJR", "JJS", "NN", "NNS", "RB", "RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" };
-
-	
-	// List of Negation Words
-	public static ArrayList<String> negationWords = new ArrayList<String>(
-			Arrays.asList("not", "no", "never", "none", "nor", "non"));
-		
-	// List of Stop Words
-	public static ArrayList<String> stopWords = new ArrayList<String>(Arrays.asList( "a", "able", "about", "above", "according", "accordingly", "across", "actually", "after",
-		"afterwards", "again", "against", "ain't", "all", "allow", "allows", "almost", "alone", "along", "already", "also",
-		"although", "always", "am", "among", "amongst", "an", "and", "another", "any", "anybody", "anyhow", "anyone", "anything",
-		"anyway", "anyways", "anywhere", "apart", "appear", "appreciate", "appropriate", "are", "aren't", "around", "as", "aside", "ask",
-		"asking", "associated", "at", "available", "away", "awfully", "be", "became", "because", "become", "becomes", "becoming", "been",
-		"before", "beforehand", "behind", "being", "believe", "below", "beside", "besides", "best", "better", "between", "beyond", "both",
-		"brief", "but", "by", "came", "can", "cannot", "cant", "can't", "cause", "causes", "certain", "certainly", "changes", "clearly",
-		"c'mon", "co", "com", "come", "comes", "concerning", "consequently", "consider", "considering", "contain", "containing",
-		"contains", "corresponding", "could", "couldn't", "course", "c's", "currently", "definitely", "described", "despite", "did", "didn't",
-		"different", "do", "does", "doesn't", "doing", "done", "don't", "down", "downwards", "during", "each", "edu", "eg", "eight",
-		"either", "else", "elsewhere", "enough", "entirely", "especially", "et", "etc", "even", "ever", "every", "everybody", "everyone",
-		"everything", "everywhere", "ex", "exactly", "example", "except", "far", "few", "fifth", "first", "five", "followed", "following",
-		"follows", "for", "former", "formerly", "forth", "four", "from", "further", "furthermore", "get", "gets", "getting", "given",
-		"gives", "go", "goes", "going", "gone", "got", "gotten", "greetings", "had", "hadn't", "happens", "hardly", "has", "hasn't",
-		"have", "haven't", "having", "he", "hello", "help", "hence", "her", "here", "hereafter", "hereby", "herein", "here's", "hereupon",
-		"hers", "herself", "he's", "hi", "him", "himself", "his", "hither", "hopefully", "how", "howbeit", "however", "i", "i'd", "ie", "if",
-		"ignored", "i'll", "i'm", "immediate", "in", "inasmuch", "inc", "indeed", "indicate", "indicated", "indicates", "inner", "insofar",
-		"instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll", "its", "it's", "itself", "i've", "just", "keep", "keeps", "kept",
-		"know", "known", "knows", "last", "lately", "later", "latter", "latterly", "least", "less", "lest", "let", "let's", "like",
-		"liked", "likely", "little", "look", "looking", "looks", "ltd", "mainly", "many", "may", "maybe", "me", "mean", "meanwhile",
-		"merely", "might", "more", "moreover", "most", "mostly", "much", "must", "my", "myself", "name", "namely", "nd", "near", "nearly",
-		"necessary", "need", "needs", "neither", "never", "nevertheless", "new", "next", "nine", "no", "nobody", "non", "none", "noone",
-		"nor", "normally", "not", "nothing", "novel", "now", "nowhere", "obviously", "of", "off", "often", "oh", "ok", "okay", "old", "on",
-		"once", "one", "ones", "only", "onto", "or", "other", "others", "otherwise", "ought", "our", "ours", "ourselves", "out", "outside",
-		"over", "overall", "own", "particular", "particularly", "per", "perhaps", "placed", "please", "plus", "possible", "presumably",
-		"probably", "provides", "que", "quite", "qv", "rather", "rd", "re", "really", "reasonably", "regarding", "regardless", "regards",
-		"relatively", "respectively", "right", "said", "same", "saw", "say", "saying", "says", "second", "secondly", "see", "seeing",
-		"seem", "seemed", "seeming", "seems", "seen", "self", "selves",	"sensible", "sent", "serious", "seriously", "seven", "several",
-		"shall", "she", "should", "shouldn't", "since", "six", "so", "some", "somebody", "somehow", "someone", "something", "sometime",
-		"sometimes", "somewhat", "somewhere", "soon", "sorry", "specified",	"specify", "specifying", "still", "sub", "such", "sup", "sure",
-		"take", "taken", "tell", "tends", "th", "than", "thank", "thanks", "thanx", "that", "thats", "that's", "the", "their", "theirs",
-		"them", "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein", "theres", "there's",
-		"thereupon", "these", "they", "they'd", "they'll", "they're", "they've", "think", "third", "this", "thorough", "thoroughly",
-		"those", "though", "three", "through", "throughout", "thru", "thus", "to", "together", "too", "took", "toward", "towards",
-		"tried", "tries", "truly", "try", "trying", "t's", "twice", "two", "un", "under", "unfortunately", "unless", "unlikely", "until",
-		"unto", "up", "upon", "us", "use", "used", "useful", "uses", "using", "usually", "value", "various", "very", "via", "viz", "vs",
-		"want", "wants", "was", "wasn't", "way", "we", "we'd", "welcome", "well", "we'll", "went", "were", "we're", "weren't", "we've",
-		"what", "whatever", "what's", "when", "whence", "whenever",	"where", "whereafter", "whereas", "whereby", "wherein", "where's",
-		"whereupon", "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "who's", "whose", "why", "will",
-		"willing", "wish", "with", "within", "without", "wonder", "won't", "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll",
-		"your", "you're", "yours", "yourself", "yourselves", "you've", "zero"));
-		
-	// Print a text in the console
-	public static void print(Object in) {
-		System.out.println(in);
-	}
-
-	public static void print(Object[] array) {
-		System.out.println(Arrays.asList(array));
-	}
-
-	public static void print(Object[][] array) {
-		System.out.print("[");
-		for (int i = 0; i < array.length; i++) {
-			print(array[i]);
-			if (i != array.length - 1) {
-				System.out.print("\n");
-			}
-			print("]");
-		}
-	}
-
-	// return the PoS (Class POS) out of the PoS-tag
-	public static POS getPOS(String posTag) {
-
-		ArrayList<String> adjective = new ArrayList<String>(Arrays.asList("JJ", "JJR", "JJS"));
-		ArrayList<String> adverb = new ArrayList<String>(Arrays.asList("RB", "RBR", "RBS"));
-		ArrayList<String> noun = new ArrayList<String>(Arrays.asList("NN", "NNS", "NNP", "NNPS"));
-		ArrayList<String> verb = new ArrayList<String>(Arrays.asList("VB", "VBD", "VBG", "VBN", "VBP", "VBZ"));
-
-		if (adjective.contains(posTag)) return POS.ADJECTIVE;
-		else if (adverb.contains(posTag)) return POS.ADVERB;
-		else if (noun.contains(posTag)) return POS.NOUN;
-		else if (verb.contains(posTag)) return POS.VERB;
-		else return null;
-
-	}
-	
-	// Check whether a list of arrays contains an array
-	public static boolean belongsTo(String[] array, ArrayList<String[]> fullList) {
-		for (String[] refArray : fullList) {
-			if (areStringArraysEqual(array, refArray))
-				return true;
-		}
-		return false;
-	}
-
-	// Check whether two arrays of strings are equal
-	public static boolean areStringArraysEqual(String[] array1, String[] array2) {
-
-		if (array1.equals(null) || array2.equals(null))
-			return false;
-
-		if (array1.length != array2.length) {
-			return false;
-		}
-		for (int i = 0; i < array1.length; i++) {
-			if (!array1[i].equals(array2[i])) {
-				return false;
-			}
-		}
+  public static String osPathChar = "\\";
+
+  // List of all the PoS tags
+  public static String[] allPOS = { "CC", "CD", "DT", "EX", "FW", "IN", "JJ",
+      "JJR", "JJS", "LS", "MD", "NN", "NNS", "NNP", "NNPS", "PDT", "POS",
+      "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO", "UH", "VB", "VBD",
+      "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB" };
+
+  // List of the PoS tags of which the senses are to be extracted
+  public static String[] relevantPOS = { "JJ", "JJR", "JJS", "NN", "NNS", "RB",
+      "RBR", "RBS", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ" };
+
+  // List of Negation Words
+  public static ArrayList<String> negationWords = new ArrayList<String>(
+      Arrays.asList("not", "no", "never", "none", "nor", "non"));
+
+  // List of Stop Words
+  public static ArrayList<String> stopWords = new ArrayList<String>(
+      Arrays.asList("a", "able", "about", "above", "according", "accordingly",
+          "across", "actually", "after", "afterwards", "again", "against",
+          "ain't", "all", "allow", "allows", "almost", "alone", "along",
+          "already", "also", "although", "always", "am", "among", "amongst",
+          "an", "and", "another", "any", "anybody", "anyhow", "anyone",
+          "anything", "anyway", "anyways", "anywhere", "apart", "appear",
+          "appreciate", "appropriate", "are", "aren't", "around", "as",
+          "aside", "ask", "asking", "associated", "at", "available", "away",
+          "awfully", "be", "became", "because", "become", "becomes",
+          "becoming", "been", "before", "beforehand", "behind", "being",
+          "believe", "below", "beside", "besides", "best", "better", "between",
+          "beyond", "both", "brief", "but", "by", "came", "can", "cannot",
+          "cant", "can't", "cause", "causes", "certain", "certainly",
+          "changes", "clearly", "c'mon", "co", "com", "come", "comes",
+          "concerning", "consequently", "consider", "considering", "contain",
+          "containing", "contains", "corresponding", "could", "couldn't",
+          "course", "c's", "currently", "definitely", "described", "despite",
+          "did", "didn't", "different", "do", "does", "doesn't", "doing",
+          "done", "don't", "down", "downwards", "during", "each", "edu", "eg",
+          "eight", "either", "else", "elsewhere", "enough", "entirely",
+          "especially", "et", "etc", "even", "ever", "every", "everybody",
+          "everyone", "everything", "everywhere", "ex", "exactly", "example",
+          "except", "far", "few", "fifth", "first", "five", "followed",
+          "following", "follows", "for", "former", "formerly", "forth", "four",
+          "from", "further", "furthermore", "get", "gets", "getting", "given",
+          "gives", "go", "goes", "going", "gone", "got", "gotten", "greetings",
+          "had", "hadn't", "happens", "hardly", "has", "hasn't", "have",
+          "haven't", "having", "he", "hello", "help", "hence", "her", "here",
+          "hereafter", "hereby", "herein", "here's", "hereupon", "hers",
+          "herself", "he's", "hi", "him", "himself", "his", "hither",
+          "hopefully", "how", "howbeit", "however", "i", "i'd", "ie", "if",
+          "ignored", "i'll", "i'm", "immediate", "in", "inasmuch", "inc",
+          "indeed", "indicate", "indicated", "indicates", "inner", "insofar",
+          "instead", "into", "inward", "is", "isn't", "it", "it'd", "it'll",
+          "its", "it's", "itself", "i've", "just", "keep", "keeps", "kept",
+          "know", "known", "knows", "last", "lately", "later", "latter",
+          "latterly", "least", "less", "lest", "let", "let's", "like", "liked",
+          "likely", "little", "look", "looking", "looks", "ltd", "mainly",
+          "many", "may", "maybe", "me", "mean", "meanwhile", "merely", "might",
+          "more", "moreover", "most", "mostly", "much", "must", "my", "myself",
+          "name", "namely", "nd", "near", "nearly", "necessary", "need",
+          "needs", "neither", "never", "nevertheless", "new", "next", "nine",
+          "no", "nobody", "non", "none", "noone", "nor", "normally", "not",
+          "nothing", "novel", "now", "nowhere", "obviously", "of", "off",
+          "often", "oh", "ok", "okay", "old", "on", "once", "one", "ones",
+          "only", "onto", "or", "other", "others", "otherwise", "ought", "our",
+          "ours", "ourselves", "out", "outside", "over", "overall", "own",
+          "particular", "particularly", "per", "perhaps", "placed", "please",
+          "plus", "possible", "presumably", "probably", "provides", "que",
+          "quite", "qv", "rather", "rd", "re", "really", "reasonably",
+          "regarding", "regardless", "regards", "relatively", "respectively",
+          "right", "said", "same", "saw", "say", "saying", "says", "second",
+          "secondly", "see", "seeing", "seem", "seemed", "seeming", "seems",
+          "seen", "self", "selves", "sensible", "sent", "serious", "seriously",
+          "seven", "several", "shall", "she", "should", "shouldn't", "since",
+          "six", "so", "some", "somebody", "somehow", "someone", "something",
+          "sometime", "sometimes", "somewhat", "somewhere", "soon", "sorry",
+          "specified", "specify", "specifying", "still", "sub", "such", "sup",
+          "sure", "take", "taken", "tell", "tends", "th", "than", "thank",
+          "thanks", "thanx", "that", "thats", "that's", "the", "their",
+          "theirs", "them", "themselves", "then", "thence", "there",
+          "thereafter", "thereby", "therefore", "therein", "theres", "there's",
+          "thereupon", "these", "they", "they'd", "they'll", "they're",
+          "they've", "think", "third", "this", "thorough", "thoroughly",
+          "those", "though", "three", "through", "throughout", "thru", "thus",
+          "to", "together", "too", "took", "toward", "towards", "tried",
+          "tries", "truly", "try", "trying", "t's", "twice", "two", "un",
+          "under", "unfortunately", "unless", "unlikely", "until", "unto",
+          "up", "upon", "us", "use", "used", "useful", "uses", "using",
+          "usually", "value", "various", "very", "via", "viz", "vs", "want",
+          "wants", "was", "wasn't", "way", "we", "we'd", "welcome", "well",
+          "we'll", "went", "were", "we're", "weren't", "we've", "what",
+          "whatever", "what's", "when", "whence", "whenever", "where",
+          "whereafter", "whereas", "whereby", "wherein", "where's",
+          "whereupon", "wherever", "whether", "which", "while", "whither",
+          "who", "whoever", "whole", "whom", "who's", "whose", "why", "will",
+          "willing", "wish", "with", "within", "without", "wonder", "won't",
+          "would", "wouldn't", "yes", "yet", "you", "you'd", "you'll", "your",
+          "you're", "yours", "yourself", "yourselves", "you've", "zero"));
+
+  // Print a text in the console
+  public static void print(Object in) {
+    System.out.println(in);
+  }
+
+  public static void print(Object[] array) {
+    System.out.println(Arrays.asList(array));
+  }
+
+  public static void print(Object[][] array) {
+    System.out.print("[");
+    for (int i = 0; i < array.length; i++) {
+      print(array[i]);
+      if (i != array.length - 1) {
+        System.out.print("\n");
+      }
+      print("]");
+    }
+  }
+
+  // return the PoS (Class POS) out of the PoS-tag
+  public static POS getPOS(String posTag) {
+
+    ArrayList<String> adjective = new ArrayList<String>(Arrays.asList("JJ",
+        "JJR", "JJS"));
+    ArrayList<String> adverb = new ArrayList<String>(Arrays.asList("RB", "RBR",
+        "RBS"));
+    ArrayList<String> noun = new ArrayList<String>(Arrays.asList("NN", "NNS",
+        "NNP", "NNPS"));
+    ArrayList<String> verb = new ArrayList<String>(Arrays.asList("VB", "VBD",
+        "VBG", "VBN", "VBP", "VBZ"));
+
+    if (adjective.contains(posTag))
+      return POS.ADJECTIVE;
+    else if (adverb.contains(posTag))
+      return POS.ADVERB;
+    else if (noun.contains(posTag))
+      return POS.NOUN;
+    else if (verb.contains(posTag))
+      return POS.VERB;
+    else
+      return null;
+
+  }
+
+  // Check whether a list of arrays contains an array
+  public static boolean belongsTo(String[] array, ArrayList<String[]> fullList) {
+    for (String[] refArray : fullList) {
+      if (areStringArraysEqual(array, refArray))
+        return true;
+    }
+    return false;
+  }
+
+  // Check whether two arrays of strings are equal
+  public static boolean areStringArraysEqual(String[] array1, String[] array2) {
+
+    if (array1.equals(null) || array2.equals(null))
+      return false;
+
+    if (array1.length != array2.length) {
+      return false;
+    }
+    for (int i = 0; i < array1.length; i++) {
+      if (!array1[i].equals(array2[i])) {
+        return false;
+      }
+    }
+
+    return true;
 
-		return true;
+  }
 
-	}
-	
 }

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java Thu Jun 25 09:20:30 2015
@@ -1,4 +1,5 @@
 package opennlp.tools.disambiguator;
+
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
@@ -11,7 +12,6 @@ import net.sf.extjwnl.dictionary.Diction
 import net.sf.extjwnl.dictionary.MorphologicalProcessor;
 import opennlp.tools.cmdline.postag.POSModelLoader;
 import opennlp.tools.lemmatizer.SimpleLemmatizer;
-import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.namefind.NameFinderME;
 import opennlp.tools.namefind.TokenNameFinderModel;
 import opennlp.tools.postag.POSModel;
@@ -25,203 +25,203 @@ import opennlp.tools.util.InvalidFormatE
 
 public class Loader {
 
-	private static String modelsDir = "src\\test\\resources\\opennlp\\tools\\disambiguator\\";
-	
-	private static SentenceDetectorME sdetector;
-	private static Tokenizer tokenizer;
-	private static POSTaggerME tagger;
-	private static NameFinderME nameFinder;
-	private static SimpleLemmatizer lemmatizer;
-
-	private static Dictionary dictionary;
-	private static MorphologicalProcessor morph;
-	private static boolean IsInitialized = false;  
-
-	// local caches for faster lookup
-	private static HashMap<String,Object> stemCache;
-	private static HashMap<String,Object> stopCache;
-	private static HashMap<String,Object> relvCache;
-	
-	
-
-	// Constructor
-	public Loader(){
-		super();
-		load();
-	}
-
-	public static HashMap<String,Object> getRelvCache(){
-		if (relvCache==null || relvCache.keySet().isEmpty()){
-			relvCache = new HashMap<String, Object>();
-			for (String t : Constants.relevantPOS){
-				relvCache.put(t, null);
-			}
-		}
-		return relvCache;
-	}
-	
-	public static HashMap<String,Object> getStopCache(){
-		if (stopCache==null || stopCache.keySet().isEmpty()){
-			stopCache = new HashMap<String, Object>();
-			for (String s : Constants.stopWords){
-				stopCache.put(s, null);
-			}
-		}
-		return stopCache;
-	}
-	
-	public static HashMap<String,Object> getStemCache(){
-		if (stemCache==null || stemCache.keySet().isEmpty()){
-			stemCache = new HashMap<String,Object>();
-			for (Object pos : POS.getAllPOS()){
-				stemCache.put(((POS)pos).getKey(),new HashMap());
-			}
-		}
-		return stemCache;
-	}
-	
-	public static MorphologicalProcessor getMorph(){
-		if (morph==null){
-			morph 		= dictionary.getMorphologicalProcessor();
-		}
-		return morph;
-	}
-
-	public static Dictionary getDictionary(){
-		if (dictionary==null){
-			try {
-				dictionary 	= Dictionary.getDefaultResourceInstance();
-			} catch (JWNLException e) {
-				e.printStackTrace();
-			}
-		}
-		return dictionary;
-	}
-
-	public static SimpleLemmatizer getLemmatizer(){
-		if (lemmatizer==null){
-			try {
-				lemmatizer = new SimpleLemmatizer (new FileInputStream(modelsDir + "en-lemmatizer.dict"));
-			} catch (IOException e) {
-				e.printStackTrace();
-			}
-		}
-		
-		return lemmatizer;
-	}
-
-	public static NameFinderME getNameFinder(){
-		if (nameFinder==null){
-			TokenNameFinderModel nameFinderModel;
-			try {
-				nameFinderModel = new TokenNameFinderModel(new FileInputStream(modelsDir + "en-ner-person.bin"));
-				nameFinder = new NameFinderME(nameFinderModel);
-			} catch (IOException e) {
-				e.printStackTrace();
-			}
-		}
-		return nameFinder;
-	}
-
-	public static POSTaggerME getTagger(){
-		if (tagger==null){
-			POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir + "en-pos-maxent.bin"));
-			tagger = new POSTaggerME(posTaggerModel);
-		}
-		return tagger;
-	}
-
-	public static SentenceDetectorME getSDetector(){
-		if (sdetector==null){
-			try {
-				SentenceModel enSentModel = new SentenceModel(new FileInputStream(modelsDir + "en-sent.bin"));
-				sdetector = new SentenceDetectorME(enSentModel);
-			} catch (IOException e) {
-				e.printStackTrace();
-			}
-		}
-		return sdetector;
-	}
-
-	public static Tokenizer getTokenizer(){
-		if (tokenizer == null){
-			try {
-				TokenizerModel  tokenizerModel = new TokenizerModel(new FileInputStream(modelsDir + "en-token.bin"));
-				tokenizer = new TokenizerME(tokenizerModel);
-			} catch (IOException e) {
-				e.printStackTrace();
-			}
-
-		}
-		return tokenizer;
-	}
-
-	public static boolean isInitialized(){
-		return (dictionary !=null
-				&& morph		!=null 
-				&& stemCache	!=null 
-				&& stopCache	!=null
-				&& relvCache 	!=null);
-	}
-	
-	public void load(){
-		try {
-			SentenceModel enSentModel = new SentenceModel(new FileInputStream(modelsDir + "en-sent.bin"));
-			sdetector = new SentenceDetectorME(enSentModel);
-
-			TokenizerModel TokenizerModel = new TokenizerModel(new FileInputStream(modelsDir + "en-token.bin"));
-			tokenizer = new TokenizerME(TokenizerModel);
-
-
-			POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir + "en-pos-maxent.bin"));
-			tagger = new POSTaggerME(posTaggerModel);
-
-			TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(new FileInputStream(modelsDir + "en-ner-person.bin"));
-			nameFinder = new NameFinderME(nameFinderModel);
-
-			lemmatizer = new SimpleLemmatizer (new FileInputStream(modelsDir + "en-lemmatizer.dict"));
-
-			dictionary 	= Dictionary.getDefaultResourceInstance();
-			morph 		= dictionary.getMorphologicalProcessor();
-
-			// loading lookup caches 
-			stemCache = new HashMap();
-			for (Object pos : POS.getAllPOS()){
-				stemCache.put(((POS)pos).getKey(),new HashMap());
-			}
-
-			stopCache = new HashMap<String, Object>();
-			for (String s : Constants.stopWords){
-				stopCache.put(s, null);
-			}
-			relvCache = new HashMap<String, Object>();
-			for (String t : Constants.relevantPOS){
-				relvCache.put(t, null);
-			}
-
-
-			if (isInitialized()){
-				Constants.print("loading was succesfull");
-			}else{
-				Constants.print("loading was unsuccesfull");
-			}
-
-		} catch (FileNotFoundException e) {
-			e.printStackTrace();
-		} catch (InvalidFormatException e) {
-			e.printStackTrace();
-		} catch (IOException e) {
-			e.printStackTrace();
-		} catch (JWNLException e) {
-			e.printStackTrace();
-		}
-	}
-
-	public static void unload ()
-	{ 
-		dictionary.close();
-	}
-
+  private static String modelsDir = "src\\test\\resources\\opennlp\\tools\\disambiguator\\";
 
+  private static SentenceDetectorME sdetector;
+  private static Tokenizer tokenizer;
+  private static POSTaggerME tagger;
+  private static NameFinderME nameFinder;
+  private static SimpleLemmatizer lemmatizer;
+
+  private static Dictionary dictionary;
+  private static MorphologicalProcessor morph;
+  private static boolean IsInitialized = false;
+
+  // local caches for faster lookup
+  private static HashMap<String, Object> stemCache;
+  private static HashMap<String, Object> stopCache;
+  private static HashMap<String, Object> relvCache;
+
+  // Constructor
+  public Loader() {
+    super();
+    load();
+  }
+
+  public static HashMap<String, Object> getRelvCache() {
+    if (relvCache == null || relvCache.keySet().isEmpty()) {
+      relvCache = new HashMap<String, Object>();
+      for (String t : Constants.relevantPOS) {
+        relvCache.put(t, null);
+      }
+    }
+    return relvCache;
+  }
+
+  public static HashMap<String, Object> getStopCache() {
+    if (stopCache == null || stopCache.keySet().isEmpty()) {
+      stopCache = new HashMap<String, Object>();
+      for (String s : Constants.stopWords) {
+        stopCache.put(s, null);
+      }
+    }
+    return stopCache;
+  }
+
+  public static HashMap<String, Object> getStemCache() {
+    if (stemCache == null || stemCache.keySet().isEmpty()) {
+      stemCache = new HashMap<String, Object>();
+      for (Object pos : POS.getAllPOS()) {
+        stemCache.put(((POS) pos).getKey(), new HashMap());
+      }
+    }
+    return stemCache;
+  }
+
+  public static MorphologicalProcessor getMorph() {
+    if (morph == null) {
+      morph = dictionary.getMorphologicalProcessor();
+    }
+    return morph;
+  }
+
+  public static Dictionary getDictionary() {
+    if (dictionary == null) {
+      try {
+        dictionary = Dictionary.getDefaultResourceInstance();
+      } catch (JWNLException e) {
+        e.printStackTrace();
+      }
+    }
+    return dictionary;
+  }
+
+  public static SimpleLemmatizer getLemmatizer() {
+    if (lemmatizer == null) {
+      try {
+        lemmatizer = new SimpleLemmatizer(new FileInputStream(modelsDir
+            + "en-lemmatizer.dict"));
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    }
+
+    return lemmatizer;
+  }
+
+  public static NameFinderME getNameFinder() {
+    if (nameFinder == null) {
+      TokenNameFinderModel nameFinderModel;
+      try {
+        nameFinderModel = new TokenNameFinderModel(new FileInputStream(
+            modelsDir + "en-ner-person.bin"));
+        nameFinder = new NameFinderME(nameFinderModel);
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    }
+    return nameFinder;
+  }
+
+  public static POSTaggerME getTagger() {
+    if (tagger == null) {
+      POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir
+          + "en-pos-maxent.bin"));
+      tagger = new POSTaggerME(posTaggerModel);
+    }
+    return tagger;
+  }
+
+  public static SentenceDetectorME getSDetector() {
+    if (sdetector == null) {
+      try {
+        SentenceModel enSentModel = new SentenceModel(new FileInputStream(
+            modelsDir + "en-sent.bin"));
+        sdetector = new SentenceDetectorME(enSentModel);
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+    }
+    return sdetector;
+  }
+
+  public static Tokenizer getTokenizer() {
+    if (tokenizer == null) {
+      try {
+        TokenizerModel tokenizerModel = new TokenizerModel(new FileInputStream(
+            modelsDir + "en-token.bin"));
+        tokenizer = new TokenizerME(tokenizerModel);
+      } catch (IOException e) {
+        e.printStackTrace();
+      }
+
+    }
+    return tokenizer;
+  }
+
+  public static boolean isInitialized() {
+    return (dictionary != null && morph != null && stemCache != null
+        && stopCache != null && relvCache != null);
+  }
+
+  public void load() {
+    try {
+      SentenceModel enSentModel = new SentenceModel(new FileInputStream(
+          modelsDir + "en-sent.bin"));
+      sdetector = new SentenceDetectorME(enSentModel);
+
+      TokenizerModel TokenizerModel = new TokenizerModel(new FileInputStream(
+          modelsDir + "en-token.bin"));
+      tokenizer = new TokenizerME(TokenizerModel);
+
+      POSModel posTaggerModel = new POSModelLoader().load(new File(modelsDir
+          + "en-pos-maxent.bin"));
+      tagger = new POSTaggerME(posTaggerModel);
+
+      TokenNameFinderModel nameFinderModel = new TokenNameFinderModel(
+          new FileInputStream(modelsDir + "en-ner-person.bin"));
+      nameFinder = new NameFinderME(nameFinderModel);
+
+      lemmatizer = new SimpleLemmatizer(new FileInputStream(modelsDir
+          + "en-lemmatizer.dict"));
+
+      dictionary = Dictionary.getDefaultResourceInstance();
+      morph = dictionary.getMorphologicalProcessor();
+
+      // loading lookup caches
+      stemCache = new HashMap();
+      for (Object pos : POS.getAllPOS()) {
+        stemCache.put(((POS) pos).getKey(), new HashMap());
+      }
+
+      stopCache = new HashMap<String, Object>();
+      for (String s : Constants.stopWords) {
+        stopCache.put(s, null);
+      }
+      relvCache = new HashMap<String, Object>();
+      for (String t : Constants.relevantPOS) {
+        relvCache.put(t, null);
+      }
+
+      if (isInitialized()) {
+        Constants.print("loading was succesfull");
+      } else {
+        Constants.print("loading was unsuccesfull");
+      }
+
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    } catch (InvalidFormatException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    }
+  }
+
+  public static void unload() {
+    dictionary.close();
+  }
 
 }

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java Thu Jun 25 09:20:30 2015
@@ -9,8 +9,6 @@ import net.sf.extjwnl.data.Word;
 import net.sf.extjwnl.data.list.PointerTargetNode;
 import net.sf.extjwnl.data.list.PointerTargetNodeList;
 
-
-
 /**
  * Convenience class to access some features.
  */
@@ -19,44 +17,42 @@ public class Node {
 
   public Synset parent;
   public Synset synset;
-  
+
   protected ArrayList<WordPOS> senseRelevantWords;
 
   public ArrayList<Synset> hypernyms = new ArrayList<Synset>();
   public ArrayList<Synset> hyponyms = new ArrayList<Synset>();
   public ArrayList<Synset> meronyms = new ArrayList<Synset>();
   public ArrayList<Synset> holonyms = new ArrayList<Synset>();
-  
+
   public ArrayList<WordPOS> synonyms = new ArrayList<WordPOS>();
-  
-  
-  public Node(Synset parent, Synset synSet, ArrayList<WordPOS> senseRelevantWords) {
-	    this.parent = parent;
-	    this.synset = synSet;
-	    this.senseRelevantWords = senseRelevantWords;
-	  }
-	  
+
+  public Node(Synset parent, Synset synSet,
+      ArrayList<WordPOS> senseRelevantWords) {
+    this.parent = parent;
+    this.synset = synSet;
+    this.senseRelevantWords = senseRelevantWords;
+  }
+
   public Node(Synset synSet, ArrayList<WordPOS> senseRelevantWords) {
-		    this.synset = synSet;
-		    this.senseRelevantWords = senseRelevantWords;
-	    }
-  
-  
-	public ArrayList<WordPOS> getSenseRelevantWords() {
-		return senseRelevantWords;
-	}
-
-	public void setSenseRelevantWords(ArrayList<WordPOS> senseRelevantWords) {
-		this.senseRelevantWords = senseRelevantWords;
-	}
-	  
+    this.synset = synSet;
+    this.senseRelevantWords = senseRelevantWords;
+  }
+
+  public ArrayList<WordPOS> getSenseRelevantWords() {
+    return senseRelevantWords;
+  }
+
+  public void setSenseRelevantWords(ArrayList<WordPOS> senseRelevantWords) {
+    this.senseRelevantWords = senseRelevantWords;
+  }
+
   public String getSense() {
     return this.synset.getGloss().toString();
   }
 
-  
   public void setHypernyms() {
-  //  PointerUtils pointerUtils = PointerUtils.get();
+    // PointerUtils pointerUtils = PointerUtils.get();
     PointerTargetNodeList phypernyms = new PointerTargetNodeList();
     try {
       phypernyms = PointerUtils.getDirectHypernyms(this.synset);
@@ -75,10 +71,10 @@ public class Node {
   }
 
   public void setMeronyms() {
-    //PointerUtils pointerUtils = PointerUtils.getInstance();
+    // PointerUtils pointerUtils = PointerUtils.getInstance();
     PointerTargetNodeList pmeronyms = new PointerTargetNodeList();
     try {
-    	pmeronyms = PointerUtils.getMeronyms(this.synset);
+      pmeronyms = PointerUtils.getMeronyms(this.synset);
     } catch (JWNLException e) {
       e.printStackTrace();
     } catch (NullPointerException e) {
@@ -91,67 +87,66 @@ public class Node {
       this.meronyms.add(ptn.getSynset());
     }
   }
-  
+
   public void setHolonyms() {
-	   // PointerUtils pointerUtils = PointerUtils.getInstance();
-	    PointerTargetNodeList pholonyms = new PointerTargetNodeList();
-	    try {
-	    	pholonyms = PointerUtils.getHolonyms(this.synset);
-	    } catch (JWNLException e) {
-	      e.printStackTrace();
-	    } catch (NullPointerException e) {
-	      System.err.println("Error finding the  holonyms");
-	      e.printStackTrace();
-	    }
-
-	    for (int i = 0; i < pholonyms.size(); i++) {
-	      PointerTargetNode ptn = (PointerTargetNode) pholonyms.get(i);
-	      this.holonyms.add(ptn.getSynset());
-	    }
+    // PointerUtils pointerUtils = PointerUtils.getInstance();
+    PointerTargetNodeList pholonyms = new PointerTargetNodeList();
+    try {
+      pholonyms = PointerUtils.getHolonyms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  holonyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < pholonyms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) pholonyms.get(i);
+      this.holonyms.add(ptn.getSynset());
+    }
+
+  }
 
-	  }
-  
   public void setHyponyms() {
-	  //  PointerUtils pointerUtils = PointerUtils.getInstance();
-	    PointerTargetNodeList phyponyms = new PointerTargetNodeList();
-	    try {
-	      phyponyms = PointerUtils.getDirectHyponyms(this.synset);
-	    } catch (JWNLException e) {
-	      e.printStackTrace();
-	    } catch (NullPointerException e) {
-	      System.err.println("Error finding the  hyponyms");
-	      e.printStackTrace();
-	    }
-
-	    for (int i = 0; i < phyponyms.size(); i++) {
-	      PointerTargetNode ptn = (PointerTargetNode) phyponyms.get(i);
-	      this.hyponyms.add(ptn.getSynset());
-	    }
-	  }
-  
-  public void setSynonyms()
-  {
+    // PointerUtils pointerUtils = PointerUtils.getInstance();
+    PointerTargetNodeList phyponyms = new PointerTargetNodeList();
+    try {
+      phyponyms = PointerUtils.getDirectHyponyms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  hyponyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < phyponyms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) phyponyms.get(i);
+      this.hyponyms.add(ptn.getSynset());
+    }
+  }
+
+  public void setSynonyms() {
     for (Word word : synset.getWords())
-      synonyms.add(new WordPOS(word.toString(),word.getPOS()));
+      synonyms.add(new WordPOS(word.toString(), word.getPOS()));
   }
-  
+
   public ArrayList<Synset> getHypernyms() {
-	  return hypernyms;
+    return hypernyms;
   }
-  
+
   public ArrayList<Synset> getHyponyms() {
-	  return hyponyms;
+    return hyponyms;
   }
-  
+
   public ArrayList<Synset> getMeronyms() {
-	  return meronyms;
+    return meronyms;
   }
+
   public ArrayList<Synset> getHolonyms() {
-	  return holonyms;
+    return holonyms;
   }
 
-  public ArrayList<WordPOS> getSynonyms()
-  {
+  public ArrayList<WordPOS> getSynonyms() {
     return synonyms;
   }
 

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java Thu Jun 25 09:20:30 2015
@@ -5,159 +5,156 @@ import java.util.HashMap;
 import java.util.List;
 
 import net.sf.extjwnl.JWNLException;
-import net.sf.extjwnl.data.IndexWord;
 import net.sf.extjwnl.data.POS;
 import opennlp.tools.util.Span;
 
-
-
 public class PreProcessor {
 
-	public PreProcessor() {
-		super();
-	}
-
-	public static String[] split(String text) {
-		return Loader.getSDetector().sentDetect(text);
-	}
-
-	public static String[] tokenize(String sentence) {
-		return Loader.getTokenizer().tokenize(sentence);		
-	}
-
-	public static String[] tag(String[] tokenizedSentence) {
-		return Loader.getTagger().tag(tokenizedSentence);
-	}
-
-	public static String lemmatize(String word, String posTag) {
-		return Loader.getLemmatizer().lemmatize(word, posTag);
-	}
-
-	public static boolean isName(String word) {
-		Span nameSpans[] = Loader.getNameFinder().find(new String[] { word });
-		return (nameSpans.length != 0);
-	}
-
-	public static ArrayList<WordPOS> getAllRelevantWords(String[]  sentence) {
-
-		ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
-
-		String[] tags = tag(sentence);
-
-		for (int i = 0; i<sentence.length; i++) {
-			if (!Loader.getStopCache().containsKey(sentence[i])) {
-				if (Loader.getRelvCache().containsKey(tags[i])) {
-					relevantWords.add(new WordPOS(sentence[i],Constants.getPOS(tags[i])));	
-				}
-
-			}
-		}
-		return relevantWords;
-	}
-
-
-	public static ArrayList<WordPOS> getAllRelevantWords(WordToDisambiguate word) {
-		return getAllRelevantWords(word.getSentence());
-	}
-
-
-	public static ArrayList<WordPOS> getRelevantWords(WordToDisambiguate word, int winBackward, int winForward) {
-
-		ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
-
-		String[] sentence = word.getSentence();
-		String[] tags = tag(sentence);
-
-		int index = word.getWordIndex();
-
-		for (int i = index - winBackward; i<=index + winForward; i++) {
-
-			if (i >= 0 && i < sentence.length && i != index) {
-				if (!Loader.getStopCache().containsKey(sentence[i])) {
-
-					if (Loader.getRelvCache().containsKey(tags[i])) {
-						relevantWords.add(new WordPOS(sentence[i],Constants.getPOS(tags[i])));	
-					}
-
-				}
-			}
-		}
-		return relevantWords;
-	}
-
-	
-	/**
-	 * Stem a single word with WordNet dictionnary
-	 * 
-	 * @param wordToStem
-	 *            word to be stemmed
-	 * @return stemmed list of words
-	 */
-	public static List StemWordWithWordNet(WordPOS wordToStem) {
-		if (!Loader.isInitialized() 
-				|| wordToStem == null)
-			return null;
-		ArrayList<String> stems = new ArrayList();
-		try {
-			for (Object pos : POS.getAllPOS()){
-				stems.addAll(Loader.getMorph().lookupAllBaseForms((POS)pos, wordToStem.getWord())) ;
-			}
-			
-			if (stems.size()>0)
-				return stems;
-			else{
-				return null;
-			}
-			
-		} catch (JWNLException e) {
-			e.printStackTrace();
-		}
-		return null;
-	}
-
-	/**
-	 * Stem a single word tries to look up the word in the stemCache HashMap If
-	 * the word is not found it is stemmed with WordNet and put into stemCache
-	 * 
-	 * @param wordToStem
-	 *            word to be stemmed
-	 * @return stemmed word list, null means the word is incorrect 
-	 */
-	public static List Stem(WordPOS wordToStem) {
-		
-		// check if we already cached the stem map
-		HashMap posMap 	 	= (HashMap) Loader.getStemCache().get(wordToStem.getPOS().getKey());
-		
-		// don't check words with digits in them
-		if (containsNumbers(wordToStem.getWord())){
-			return null;
-		}
-						
-		List stemList = (List) posMap.get(wordToStem.getWord());
-		if (stemList != null){ // return it if we already cached it
-			return stemList;
-			
-		} else { // unCached list try to stem it
-			stemList = StemWordWithWordNet(wordToStem);
-			if (stemList != null) {
-				// word was recognized and stemmed with wordnet:
-				// add it to cache and return the stemmed list
-				posMap.put(wordToStem.getWord(),stemList);
-				Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
-				return stemList;
-			}else{ // could not be stemmed add it anyway (as incorrect with null list)
-				posMap.put(wordToStem.getWord(), null);
-				Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
-				return null;
-			}
-		}
-	}
-	
-	public static boolean containsNumbers(String word) {
-		// checks if the word is or contains a number
-		return word.matches(".*[0-9].*");
-	}
-	
-	
-	
+  public PreProcessor() {
+    super();
+  }
+
+  public static String[] split(String text) {
+    return Loader.getSDetector().sentDetect(text);
+  }
+
+  public static String[] tokenize(String sentence) {
+    return Loader.getTokenizer().tokenize(sentence);
+  }
+
+  public static String[] tag(String[] tokenizedSentence) {
+    return Loader.getTagger().tag(tokenizedSentence);
+  }
+
+  public static String lemmatize(String word, String posTag) {
+    return Loader.getLemmatizer().lemmatize(word, posTag);
+  }
+
+  public static boolean isName(String word) {
+    Span nameSpans[] = Loader.getNameFinder().find(new String[] { word });
+    return (nameSpans.length != 0);
+  }
+
+  public static ArrayList<WordPOS> getAllRelevantWords(String[] sentence) {
+
+    ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
+
+    String[] tags = tag(sentence);
+
+    for (int i = 0; i < sentence.length; i++) {
+      if (!Loader.getStopCache().containsKey(sentence[i])) {
+        if (Loader.getRelvCache().containsKey(tags[i])) {
+          relevantWords
+              .add(new WordPOS(sentence[i], Constants.getPOS(tags[i])));
+        }
+
+      }
+    }
+    return relevantWords;
+  }
+
+  public static ArrayList<WordPOS> getAllRelevantWords(WordToDisambiguate word) {
+    return getAllRelevantWords(word.getSentence());
+  }
+
+  public static ArrayList<WordPOS> getRelevantWords(WordToDisambiguate word,
+      int winBackward, int winForward) {
+
+    ArrayList<WordPOS> relevantWords = new ArrayList<WordPOS>();
+
+    String[] sentence = word.getSentence();
+    String[] tags = tag(sentence);
+
+    int index = word.getWordIndex();
+
+    for (int i = index - winBackward; i <= index + winForward; i++) {
+
+      if (i >= 0 && i < sentence.length && i != index) {
+        if (!Loader.getStopCache().containsKey(sentence[i])) {
+
+          if (Loader.getRelvCache().containsKey(tags[i])) {
+            relevantWords.add(new WordPOS(sentence[i], Constants
+                .getPOS(tags[i])));
+          }
+
+        }
+      }
+    }
+    return relevantWords;
+  }
+
+  /**
+   * Stem a single word with WordNet dictionnary
+   * 
+   * @param wordToStem
+   *          word to be stemmed
+   * @return stemmed list of words
+   */
+  public static List StemWordWithWordNet(WordPOS wordToStem) {
+    if (!Loader.isInitialized() || wordToStem == null)
+      return null;
+    ArrayList<String> stems = new ArrayList();
+    try {
+      for (Object pos : POS.getAllPOS()) {
+        stems.addAll(Loader.getMorph().lookupAllBaseForms((POS) pos,
+            wordToStem.getWord()));
+      }
+
+      if (stems.size() > 0)
+        return stems;
+      else {
+        return null;
+      }
+
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    }
+    return null;
+  }
+
+  /**
+   * Stem a single word tries to look up the word in the stemCache HashMap If
+   * the word is not found it is stemmed with WordNet and put into stemCache
+   * 
+   * @param wordToStem
+   *          word to be stemmed
+   * @return stemmed word list, null means the word is incorrect
+   */
+  public static List Stem(WordPOS wordToStem) {
+
+    // check if we already cached the stem map
+    HashMap posMap = (HashMap) Loader.getStemCache().get(
+        wordToStem.getPOS().getKey());
+
+    // don't check words with digits in them
+    if (containsNumbers(wordToStem.getWord())) {
+      return null;
+    }
+
+    List stemList = (List) posMap.get(wordToStem.getWord());
+    if (stemList != null) { // return it if we already cached it
+      return stemList;
+
+    } else { // unCached list try to stem it
+      stemList = StemWordWithWordNet(wordToStem);
+      if (stemList != null) {
+        // word was recognized and stemmed with wordnet:
+        // add it to cache and return the stemmed list
+        posMap.put(wordToStem.getWord(), stemList);
+        Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
+        return stemList;
+      } else { // could not be stemmed add it anyway (as incorrect with null
+               // list)
+        posMap.put(wordToStem.getWord(), null);
+        Loader.getStemCache().put(wordToStem.getPOS().getKey(), posMap);
+        return null;
+      }
+    }
+  }
+
+  public static boolean containsNumbers(String word) {
+    // checks if the word is or contains a number
+    return word.matches(".*[0-9].*");
+  }
+
 }

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java Thu Jun 25 09:20:30 2015
@@ -6,10 +6,8 @@ import opennlp.tools.util.Span;
  * The interface for word sense disambiguators.
  */
 public interface WSDisambiguator {
-	
-	public String[] disambiguate(String[] inputText,int inputWordIndex); 
-	
-	public String[] disambiguate(String[] inputText, Span[] inputWordSpans);
-	
 
-}
\ No newline at end of file
+  public String[] disambiguate(String[] inputText, int inputWordIndex);
+
+  public String[] disambiguate(String[] inputText, Span[] inputWordSpans);
+}

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java Thu Jun 25 09:20:30 2015
@@ -1,7 +1,6 @@
 package opennlp.tools.disambiguator;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 
@@ -9,86 +8,84 @@ import net.sf.extjwnl.JWNLException;
 import net.sf.extjwnl.data.IndexWord;
 import net.sf.extjwnl.data.POS;
 import net.sf.extjwnl.data.Synset;
-import net.sf.extjwnl.dictionary.Dictionary;
-
 
 public class WordPOS {
 
-	private String word;
-	private List stems;
-	private POS pos;
-
-	// Constructor
-	public WordPOS(String word, POS pos) throws IllegalArgumentException{
-		if (word==null || pos ==null){
-			throw new IllegalArgumentException("Args are null");
-		}
-		this.word = word;
-		this.pos = pos;
-	}
-
-	public String getWord() {
-		return word;
-	}
-
-	public POS getPOS() {
-		return pos;
-	}
-
-	public List getStems() {
-		if (stems==null){
-			return PreProcessor.Stem(this);
-		}else{
-			return stems;
-		}
-	}
-
-
-	// Return the synsets (thus the senses) of the current word
-	public ArrayList<Synset> getSynsets() {
-
-		IndexWord indexWord;
-		try {
-			indexWord = Loader.getDictionary().lookupIndexWord(pos, word);
-			List<Synset> synsets = indexWord.getSenses();
-			return (new ArrayList<Synset>(synsets));
-		} catch (JWNLException e) {
-			e.printStackTrace();
-		}
-		return null;
-	}
-
-	// uses Stemming to check if two words are equivalent
-	public boolean isStemEquivalent(WordPOS wordToCompare) {
-		// check if there is intersection in the stems;
-		List originalList  = this.getStems();
-		List listToCompare = wordToCompare.getStems();
-
-//		Constants.print("+++++++++++++++++++++  ::: "+ this.getWord());
-//		Constants.print("+++++++++++++++++++++  ::: "+ wordToCompare.getWord());
-//		Constants.print("the first list is \n"+originalList.toString());
-//		Constants.print("the second list is \n"+listToCompare.toString());
-
-		if(originalList==null || listToCompare==null){ // any of the two requested words do not exist
-			return false;
-		}else{
-			return !Collections.disjoint(originalList, listToCompare);
-		}
-
-	}
-
-
-	// uses Lemma to check if two words are equivalent
-	public boolean isLemmaEquivalent(WordPOS wordToCompare) {
-		// TODO use lemmatizer to compare with lemmas
- 				
-		ArrayList<String> lemmas_word = new ArrayList();
-		ArrayList<String> lemmas_wordToCompare = new ArrayList();
-		
-		for (String pos : Constants.allPOS){
-			Loader.getLemmatizer().lemmatize(wordToCompare.getWord(), pos);
-		}
-		return false;
-	}
+  private String word;
+  private List stems;
+  private POS pos;
+
+  // Constructor
+  public WordPOS(String word, POS pos) throws IllegalArgumentException {
+    if (word == null || pos == null) {
+      throw new IllegalArgumentException("Args are null");
+    }
+    this.word = word;
+    this.pos = pos;
+  }
+
+  public String getWord() {
+    return word;
+  }
+
+  public POS getPOS() {
+    return pos;
+  }
+
+  public List getStems() {
+    if (stems == null) {
+      return PreProcessor.Stem(this);
+    } else {
+      return stems;
+    }
+  }
+
+  // Return the synsets (thus the senses) of the current word
+  public ArrayList<Synset> getSynsets() {
+
+    IndexWord indexWord;
+    try {
+      indexWord = Loader.getDictionary().lookupIndexWord(pos, word);
+      List<Synset> synsets = indexWord.getSenses();
+      return (new ArrayList<Synset>(synsets));
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    }
+    return null;
+  }
+
+  // uses Stemming to check if two words are equivalent
+  public boolean isStemEquivalent(WordPOS wordToCompare) {
+    // check if there is intersection in the stems;
+    List originalList = this.getStems();
+    List listToCompare = wordToCompare.getStems();
+
+    // Constants.print("+++++++++++++++++++++  ::: "+ this.getWord());
+    // Constants.print("+++++++++++++++++++++  ::: "+ wordToCompare.getWord());
+    // Constants.print("the first list is \n"+originalList.toString());
+    // Constants.print("the second list is \n"+listToCompare.toString());
+
+    if (originalList == null || listToCompare == null) { // any of the two
+                                                         // requested words do
+                                                         // not exist
+      return false;
+    } else {
+      return !Collections.disjoint(originalList, listToCompare);
+    }
+
+  }
+
+  // uses Lemma to check if two words are equivalent
+  public boolean isLemmaEquivalent(WordPOS wordToCompare) {
+    // TODO use lemmatizer to compare with lemmas
+
+    ArrayList<String> lemmas_word = new ArrayList();
+    ArrayList<String> lemmas_wordToCompare = new ArrayList();
+
+    for (String pos : Constants.allPOS) {
+      Loader.getLemmatizer().lemmatize(wordToCompare.getWord(), pos);
+    }
+    return false;
+  }
 
 }

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java Thu Jun 25 09:20:30 2015
@@ -1,75 +1,62 @@
 package opennlp.tools.disambiguator;
 
-import java.util.ArrayList;
-import java.util.concurrent.Semaphore;
-
 import opennlp.tools.disambiguator.lesk.WTDLesk;
 
-public class WordSense implements Comparable{ 
-	
-	protected WTDLesk WTDLesk;
-	protected Node node;
-	protected int id;
-	protected double score;
-	
-	
-	public WordSense(WTDLesk WTDLesk, Node node) {
-		super();
-		this.WTDLesk = WTDLesk;
-		this.node = node;
-	}
-
-	public WordSense() {
-		super();
-	}
-
-	
-	public WTDLesk getWTDLesk() {
-		return WTDLesk;
-	}
-
-	public void setWTDLesk(WTDLesk WTDLesk) {
-		this.WTDLesk = WTDLesk;
-	}
-
-	
-	public Node getNode() {
-		return node;
-	}
-
-	public void setNode(Node node) {
-		this.node = node;
-	}
-
-	
-	public double getScore() {
-		return score;
-	}
-
-	public void setScore(double score) {
-		this.score = score;
-	}
-
-	public int getId() {
-		return id;
-	}
-
-	public void setId(int id) {
-		this.id = id;
-	}
-
-
-	public int compareTo(Object o) {
-		return (this.score-((WordSense)o).score)<0?1:-1;
-	}
-	
-	
-	public String getSense() {
-		return node.getSense();
-	}
-	
+public class WordSense implements Comparable {
 
+  protected WTDLesk WTDLesk;
+  protected Node node;
+  protected int id;
+  protected double score;
+
+  public WordSense(WTDLesk WTDLesk, Node node) {
+    super();
+    this.WTDLesk = WTDLesk;
+    this.node = node;
+  }
+
+  public WordSense() {
+    super();
+  }
+
+  public WTDLesk getWTDLesk() {
+    return WTDLesk;
+  }
+
+  public void setWTDLesk(WTDLesk WTDLesk) {
+    this.WTDLesk = WTDLesk;
+  }
+
+  public Node getNode() {
+    return node;
+  }
+
+  public void setNode(Node node) {
+    this.node = node;
+  }
+
+  public double getScore() {
+    return score;
+  }
+
+  public void setScore(double score) {
+    this.score = score;
+  }
+
+  public int getId() {
+    return id;
+  }
+
+  public void setId(int id) {
+    this.id = id;
+  }
+
+  public int compareTo(Object o) {
+    return (this.score - ((WordSense) o).score) < 0 ? 1 : -1;
+  }
+
+  public String getSense() {
+    return node.getSense();
+  }
 
 }
-
-

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java Thu Jun 25 09:20:30 2015
@@ -1,95 +1,77 @@
 package opennlp.tools.disambiguator;
 
-
-
 public class WordToDisambiguate {
-	
-	protected String [] sentence;
-	protected int wordIndex;
-	protected String posTag;
-
-	protected int sense;
-	
-	
-	
-	/**
-	 * Constructor
-	 */
-	
-	
-	public WordToDisambiguate(String[] sentence, int wordIndex, int sense) throws IllegalArgumentException{
-		super();
-		
-		if (wordIndex>sentence.length){
-			throw new IllegalArgumentException("The index is out of bounds !");
-		}
-		this.sentence = sentence;
-		this.wordIndex = wordIndex;
-		String[] posTags = PreProcessor.tag(sentence);
-		this.posTag = posTags[wordIndex];
-		this.sense = sense;
-	}
-	
-	public WordToDisambiguate(String[] sentence, int wordIndex) {
-		this(sentence,wordIndex,-1);
-	}
-	
-		
-	
-	/**
-	 * Getters and Setters
-	 */
-	
-	
-
-	// sentence
-	public String[] getSentence() {
-		return sentence;
-	}
-
-	public void setSentence(String[] sentence) {
-		this.sentence = sentence;
-	}
-
-	
-	// word
-	public int getWordIndex() {
-		return wordIndex;
-	}
-
-	public void setWordIndex(int wordIndex) {
-		this.wordIndex = wordIndex;
-	}
-	
-	public String getWord(){
-		return sentence[wordIndex];
-	}
-	
-	
-	// posTag
-	public String getPosTag() {
-		return posTag;
-	}
-
-	public void setPosTag(String posTag) {
-		this.posTag = posTag;
-	}
-	
-	
-	// sense
-	public int getSense() {
-		return sense;
-	}
-
-	public void setSense(int sense) {
-		this.sense = sense;
-	}
-
-
-
-	
-	
-
-	
 
+  protected String[] sentence;
+  protected int wordIndex;
+  protected String posTag;
+
+  protected int sense;
+
+  /**
+   * Constructor
+   */
+
+  public WordToDisambiguate(String[] sentence, int wordIndex, int sense)
+      throws IllegalArgumentException {
+    super();
+
+    if (wordIndex > sentence.length) {
+      throw new IllegalArgumentException("The index is out of bounds !");
+    }
+    this.sentence = sentence;
+    this.wordIndex = wordIndex;
+    String[] posTags = PreProcessor.tag(sentence);
+    this.posTag = posTags[wordIndex];
+    this.sense = sense;
+  }
+
+  public WordToDisambiguate(String[] sentence, int wordIndex) {
+    this(sentence, wordIndex, -1);
+  }
+
+  /**
+   * Getters and Setters
+   */
+
+  // sentence
+  public String[] getSentence() {
+    return sentence;
+  }
+
+  public void setSentence(String[] sentence) {
+    this.sentence = sentence;
+  }
+
+  // word
+  public int getWordIndex() {
+    return wordIndex;
+  }
+
+  public void setWordIndex(int wordIndex) {
+    this.wordIndex = wordIndex;
+  }
+
+  public String getWord() {
+    return sentence[wordIndex];
+  }
+
+  // posTag
+  public String getPosTag() {
+    return posTag;
+  }
+
+  public void setPosTag(String posTag) {
+    this.posTag = posTag;
+  }
+
+  // sense
+  public int getSense() {
+    return sense;
+  }
+
+  public void setSense(int sense) {
+    this.sense = sense;
+  }
 }
+

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java Thu Jun 25 09:20:30 2015
@@ -5,108 +5,94 @@ import java.util.ArrayList;
 import opennlp.tools.disambiguator.Constants;
 import opennlp.tools.disambiguator.Loader;
 
-
 public class FeaturesExtractor {
 
-	
-	
-	public FeaturesExtractor() {
-		super();
-	}
-
-
-	/**
-	 * @Algorithm: IMS (It Makes Sense)
-	 * 
-	 * The following methods serve to extract the features for the algorithm IMS.
-	 */
-	
-	public String[] extractPosOfSurroundingWords (String[] sentence, int wordIndex, int numberOfWords) {
-		
-		String[] taggedSentence = Loader.getTagger().tag(sentence);
-		
-		String[] tags = new String[2*numberOfWords+1];
-		
-		int j = 0;
-		
-		for (int i = wordIndex - numberOfWords; i < wordIndex + numberOfWords ; i++) {
-			if (i < 0 || i >= sentence.length) {
-				tags[j] = "null";
-			} else {
-				tags[j] = taggedSentence[i];
-			}
-			j++;
-		}
-		
-		return tags;
-	}
-	
-	
-	public String[] extractSurroundingWords(String[] sentence, int wordIndex) {
-
-		String[] posTags = Loader.getTagger().tag(sentence);
-		
-		Constants.print(posTags);
-		
-		ArrayList<String> contextWords = new ArrayList<String>();
-
-		for (int i = 0; i < sentence.length; i++) {
-
-			if (!Constants.stopWords.contains(sentence[i].toLowerCase())
-					&& (wordIndex != i)) {
-				
-				String word = sentence[i].toLowerCase().replaceAll("[^a-z]", "").trim();
-				
-				if (!word.equals("")) {
-					String lemma = Loader.getLemmatizer().lemmatize(sentence[i], posTags[i]);
-					contextWords.add(lemma);
-				}
-				
-				
-
-
-			}
-		}
-
-		return contextWords.toArray(new String[contextWords.size()]);
-	}
-	
-	
-	public ArrayList<String[]> extractLocalCollocations(String[] sentence, int wordIndex, int range) {
-		/**
-		 * Here the author used only 11 features of this type. the range was set to 3 (bigrams extracted in a way that they are at max separated
-		 * by 1 word).
-		 */
-		
-		ArrayList<String[]> localCollocations = new ArrayList<String[]>();
-		
-		for (int i = wordIndex - range; i <= wordIndex + range ; i++) {
-			
-			if (!(i < 0 || i > sentence.length - 2)) {
-					if ((i != wordIndex) && (i+1 != wordIndex) && (i+1 < wordIndex + range)) {
-						String[] lc =  {sentence[i], sentence[i+1]};
-						localCollocations.add(lc);
-					}
-					if ((i != wordIndex) && (i+2 != wordIndex) && (i+2 < wordIndex + range)) {
-						String[] lc =  {sentence[i], sentence[i+2]};
-						localCollocations.add(lc);
-					}
-			}
-			
-		}
-		
-		return localCollocations;
-	}
-
-	
-	/**
-	 * @Algorithm: SST
-	 * 
-	 * The following methods serve to extract the features for the algorithm SST.
-	 */
-	
-	
-	
-	
-	
+  public FeaturesExtractor() {
+    super();
+  }
+
+  /**
+   * @Algorithm: IMS (It Makes Sense)
+   * 
+   *             The following methods serve to extract the features for the
+   *             algorithm IMS.
+   */
+
+  public String[] extractPosOfSurroundingWords(String[] sentence,
+      int wordIndex, int numberOfWords) {
+
+    String[] taggedSentence = Loader.getTagger().tag(sentence);
+
+    String[] tags = new String[2 * numberOfWords + 1];
+
+    int j = 0;
+
+    for (int i = wordIndex - numberOfWords; i < wordIndex + numberOfWords; i++) {
+      if (i < 0 || i >= sentence.length) {
+        tags[j] = "null";
+      } else {
+        tags[j] = taggedSentence[i];
+      }
+      j++;
+    }
+
+    return tags;
+  }
+
+  public String[] extractSurroundingWords(String[] sentence, int wordIndex) {
+
+    String[] posTags = Loader.getTagger().tag(sentence);
+
+    Constants.print(posTags);
+
+    ArrayList<String> contextWords = new ArrayList<String>();
+
+    for (int i = 0; i < sentence.length; i++) {
+
+      if (!Constants.stopWords.contains(sentence[i].toLowerCase())
+          && (wordIndex != i)) {
+
+        String word = sentence[i].toLowerCase().replaceAll("[^a-z]", "").trim();
+
+        if (!word.equals("")) {
+          String lemma = Loader.getLemmatizer().lemmatize(sentence[i],
+              posTags[i]);
+          contextWords.add(lemma);
+        }
+
+      }
+    }
+
+    return contextWords.toArray(new String[contextWords.size()]);
+  }
+
+  public ArrayList<String[]> extractLocalCollocations(String[] sentence,
+      int wordIndex, int range) {
+    /**
+     * Here the author used only 11 features of this type. the range was set to
+     * 3 (bigrams extracted in a way that they are at max separated by 1 word).
+     */
+
+    ArrayList<String[]> localCollocations = new ArrayList<String[]>();
+
+    for (int i = wordIndex - range; i <= wordIndex + range; i++) {
+
+      if (!(i < 0 || i > sentence.length - 2)) {
+        if ((i != wordIndex) && (i + 1 != wordIndex)
+            && (i + 1 < wordIndex + range)) {
+          String[] lc = { sentence[i], sentence[i + 1] };
+          localCollocations.add(lc);
+        }
+        if ((i != wordIndex) && (i + 2 != wordIndex)
+            && (i + 2 < wordIndex + range)) {
+          String[] lc = { sentence[i], sentence[i + 2] };
+          localCollocations.add(lc);
+        }
+      }
+
+    }
+
+    return localCollocations;
+  }
 }
+

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java Thu Jun 25 09:20:30 2015
@@ -5,95 +5,90 @@ import java.util.ArrayList;
 import opennlp.tools.disambiguator.WSDisambiguator;
 import opennlp.tools.util.Span;
 
-public class IMS implements WSDisambiguator{
-	
-	FeaturesExtractor fExtractor = new FeaturesExtractor();
-	
-	/**
-	 * PARAMETERS
-	 */
-	
-	int numberOfSurroundingWords;
-	int ngram;
-	
-	
-	
-	/**
-	 * Constructors
-	 */
-	
-	public IMS() {
-		super();
-		numberOfSurroundingWords = 3;
-		ngram = 2;
-	}
-	
-	public IMS(int numberOfSurroundingWords, int ngram) {
-		super();
-		this.numberOfSurroundingWords = numberOfSurroundingWords;
-		this.ngram = ngram;
-	}
-	
-	
-	
-	/**
-	 * INTERNAL METHODS
-	 */
-	
-	private void extractFeature(ArrayList<WTDIMS> words) {
-		
-		for (WTDIMS word : words) {
-			
-			word.setPosOfSurroundingWords(fExtractor.extractPosOfSurroundingWords(word.getSentence(), word.getWordIndex(), numberOfSurroundingWords));
-			
-			word.setSurroundingWords(fExtractor.extractSurroundingWords(word.getSentence(), word.getWordIndex()));
-			
-			word.setLocalCollocations(fExtractor.extractLocalCollocations(word.getSentence(), word.getWordIndex(), ngram));
-			
-		}
-
-	}
-	
-	private ArrayList<WTDIMS> extractTrainingData(String xmlFile) {
-		
-		ArrayList<WTDIMS> trainingData = new ArrayList<WTDIMS>();
-		
-		/**
-		 * TODO Processing of the xml File here (To check the format of the data)
-		 */
-		
-		return trainingData;
-	}
-	
-	
-	public void train(String trainingSetFile) { // TODO To revise after finihsing the implementation of the collector
-		
-		ArrayList<WTDIMS> instances = extractTrainingData(trainingSetFile);
-		
-		extractFeature(instances);
-		
-		
-		
-	}
-	
-	
-	public void load (String binFile) {
-		// TODO After finishing training the training data
-				
-	}
-	
-
-	@Override
-	public String[] disambiguate(String[] inputText, int inputWordIndex) {
-		// TODO Auto-generated method stub
-		return null;
-	}
-
-	@Override
-	public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
-		// TODO Auto-generated method stub
-		return null;
-	}
-	
+public class IMS implements WSDisambiguator {
+
+  FeaturesExtractor fExtractor = new FeaturesExtractor();
+
+  /**
+   * PARAMETERS
+   */
+
+  int numberOfSurroundingWords;
+  int ngram;
+
+  /**
+   * Constructors
+   */
+
+  public IMS() {
+    super();
+    numberOfSurroundingWords = 3;
+    ngram = 2;
+  }
+
+  public IMS(int numberOfSurroundingWords, int ngram) {
+    super();
+    this.numberOfSurroundingWords = numberOfSurroundingWords;
+    this.ngram = ngram;
+  }
+
+  /**
+   * INTERNAL METHODS
+   */
+
+  private void extractFeature(ArrayList<WTDIMS> words) {
+
+    for (WTDIMS word : words) {
+
+      word.setPosOfSurroundingWords(fExtractor.extractPosOfSurroundingWords(
+          word.getSentence(), word.getWordIndex(), numberOfSurroundingWords));
+
+      word.setSurroundingWords(fExtractor.extractSurroundingWords(
+          word.getSentence(), word.getWordIndex()));
+
+      word.setLocalCollocations(fExtractor.extractLocalCollocations(
+          word.getSentence(), word.getWordIndex(), ngram));
+
+    }
+
+  }
+
+  private ArrayList<WTDIMS> extractTrainingData(String xmlFile) {
+
+    ArrayList<WTDIMS> trainingData = new ArrayList<WTDIMS>();
+
+    /**
+     * TODO Processing of the xml File here (To check the format of the data)
+     */
+
+    return trainingData;
+  }
+
+  public void train(String trainingSetFile) { // TODO To revise after finihsing
+                                              // the implementation of the
+                                              // collector
+
+    ArrayList<WTDIMS> instances = extractTrainingData(trainingSetFile);
+
+    extractFeature(instances);
+
+  }
+
+  public void load(String binFile) {
+    // TODO After finishing training the training data
+
+  }
+
+  @Override
+  public String[] disambiguate(String[] inputText, int inputWordIndex) {
+    // TODO Auto-generated method stub
+    return null;
+  }
+
+  @Override
+  public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
+    // TODO Auto-generated method stub
+    return null;
+  }
 
 }

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java Thu Jun 25 09:20:30 2015
@@ -1,56 +1,49 @@
 package opennlp.tools.disambiguator.ims;
+
 import java.util.ArrayList;
 
 import opennlp.tools.disambiguator.WordToDisambiguate;
 
-
 public class WTDIMS extends WordToDisambiguate {
-		
-	protected String[] posOfSurroundingWords;
-	protected String[] surroundingWords;
-	protected ArrayList<String[]> localCollocations;
-	
-	
-	
-	/**
-	 * Constructor
-	 */
-	public WTDIMS(String[] sentence, int word, int sense) {
-		super(sentence, word, sense);
-	}
-
-	
-	
-	/**
-	 * Getters and Setters
-	 */
-	
-	
-	public String[] getPosOfSurroundingWords() {
-		return posOfSurroundingWords;
-	}
-
-	public void setPosOfSurroundingWords(String[] posOfSurroundingWords) {
-		this.posOfSurroundingWords = posOfSurroundingWords;
-	}
-	
-
-	public String[] getSurroundingWords() {
-		return surroundingWords;
-	}
-
-	public void setSurroundingWords(String[] surroundingWords) {
-		this.surroundingWords = surroundingWords;
-	}
-
-	
-	public ArrayList<String[]> getLocalCollocations() {
-		return localCollocations;
-	}
-
-	public void setLocalCollocations(ArrayList<String[]> localCollocations) {
-		this.localCollocations = localCollocations;
-	}
-	
-	
+
+  protected String[] posOfSurroundingWords;
+  protected String[] surroundingWords;
+  protected ArrayList<String[]> localCollocations;
+
+  /**
+   * Constructor
+   */
+  public WTDIMS(String[] sentence, int word, int sense) {
+    super(sentence, word, sense);
+  }
+
+  /**
+   * Getters and Setters
+   */
+
+  public String[] getPosOfSurroundingWords() {
+    return posOfSurroundingWords;
+  }
+
+  public void setPosOfSurroundingWords(String[] posOfSurroundingWords) {
+    this.posOfSurroundingWords = posOfSurroundingWords;
+  }
+
+  public String[] getSurroundingWords() {
+    return surroundingWords;
+  }
+
+  public void setSurroundingWords(String[] surroundingWords) {
+    this.surroundingWords = surroundingWords;
+  }
+
+  public ArrayList<String[]> getLocalCollocations() {
+    return localCollocations;
+  }
+
+  public void setLocalCollocations(ArrayList<String[]> localCollocations) {
+    this.localCollocations = localCollocations;
+  }
+
 }
+