You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2015/07/06 09:49:32 UTC
svn commit: r1689330 - in /opennlp/sandbox/opennlp-wsd/src: main/java/opennlp/tools/disambiguator/ main/java/opennlp/tools/disambiguator/ims/ main/java/opennlp/tools/disambiguator/lesk/ test/java/opennlp/tools/disambiguator/

Author: joern
Date: Mon Jul  6 07:49:31 2015
New Revision: 1689330

URL: http://svn.apache.org/r1689330
Log:
OPENNLP-790 First iteration of the evaluator, testing on basic lesk, will need to validate and check the different performances. Thanks to Anthony Beylerian for providing a patch.

Added:
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java   (with props)
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java   (with props)
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java   (with props)
    opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java   (with props)
Removed:
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
Modified:
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
    opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java?rev=1689330&r1=1689329&r2=1689330&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java Mon Jul  6 07:49:31 2015
@@ -142,7 +142,7 @@ public class Constants {
           parts = result.split("@");
           pos = POS.getPOSForKey(parts[0]);
           offset = Long.parseLong(parts[1]);
-          score = Double.parseDouble(parts[2]);
+          score = Double.parseDouble(parts[3]);
           try {
             Constants.print("score : " + score + " for : "
                 + Loader.getDictionary().getSynsetAt(pos, offset).getGloss());

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java?rev=1689330&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java Mon Jul  6 07:49:31 2015
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.util.ArrayList;
+
+import net.sf.extjwnl.JWNLException;
+import net.sf.extjwnl.data.PointerUtils;
+import net.sf.extjwnl.data.Synset;
+import net.sf.extjwnl.data.Word;
+import net.sf.extjwnl.data.list.PointerTargetNode;
+import net.sf.extjwnl.data.list.PointerTargetNodeList;
+
+/**
+ * Convenience class to access some features.
+ */
+public class SynNode {
+
+  public Synset parent;
+  public Synset synset;
+
+  protected ArrayList<WordPOS> senseRelevantWords;
+
+  public ArrayList<Synset> hypernyms = new ArrayList<Synset>();
+  public ArrayList<Synset> hyponyms = new ArrayList<Synset>();
+  public ArrayList<Synset> meronyms = new ArrayList<Synset>();
+  public ArrayList<Synset> holonyms = new ArrayList<Synset>();
+
+  public ArrayList<WordPOS> synonyms = new ArrayList<WordPOS>();
+
+  public SynNode(Synset parent, Synset synSet,
+      ArrayList<WordPOS> senseRelevantWords) {
+    this.parent = parent;
+    this.synset = synSet;
+    this.senseRelevantWords = senseRelevantWords;
+  }
+
+  public SynNode(Synset synSet, ArrayList<WordPOS> senseRelevantWords) {
+    this.synset = synSet;
+    this.senseRelevantWords = senseRelevantWords;
+  }
+
+  public ArrayList<WordPOS> getSenseRelevantWords() {
+    return senseRelevantWords;
+  }
+
+  public void setSenseRelevantWords(ArrayList<WordPOS> senseRelevantWords) {
+    this.senseRelevantWords = senseRelevantWords;
+  }
+
+  public void setHypernyms() {
+    // PointerUtils pointerUtils = PointerUtils.get();
+    PointerTargetNodeList phypernyms = new PointerTargetNodeList();
+    try {
+      phypernyms = PointerUtils.getDirectHypernyms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  hypernyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < phypernyms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) phypernyms.get(i);
+      this.hypernyms.add(ptn.getSynset());
+    }
+
+  }
+
+  public void setMeronyms() {
+    // PointerUtils pointerUtils = PointerUtils.getInstance();
+    PointerTargetNodeList pmeronyms = new PointerTargetNodeList();
+    try {
+      pmeronyms = PointerUtils.getMeronyms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  meronyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < pmeronyms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) pmeronyms.get(i);
+      this.meronyms.add(ptn.getSynset());
+    }
+  }
+
+  public void setHolonyms() {
+    // PointerUtils pointerUtils = PointerUtils.getInstance();
+    PointerTargetNodeList pholonyms = new PointerTargetNodeList();
+    try {
+      pholonyms = PointerUtils.getHolonyms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  holonyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < pholonyms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) pholonyms.get(i);
+      this.holonyms.add(ptn.getSynset());
+    }
+
+  }
+
+  public void setHyponyms() {
+    // PointerUtils pointerUtils = PointerUtils.getInstance();
+    PointerTargetNodeList phyponyms = new PointerTargetNodeList();
+    try {
+      phyponyms = PointerUtils.getDirectHyponyms(this.synset);
+    } catch (JWNLException e) {
+      e.printStackTrace();
+    } catch (NullPointerException e) {
+      System.err.println("Error finding the  hyponyms");
+      e.printStackTrace();
+    }
+
+    for (int i = 0; i < phyponyms.size(); i++) {
+      PointerTargetNode ptn = (PointerTargetNode) phyponyms.get(i);
+      this.hyponyms.add(ptn.getSynset());
+    }
+  }
+
+  public void setSynonyms() {
+    for (Word word : synset.getWords())
+      synonyms.add(new WordPOS(word.toString(), word.getPOS()));
+  }
+
+  public ArrayList<Synset> getHypernyms() {
+    return hypernyms;
+  }
+
+  public ArrayList<Synset> getHyponyms() {
+    return hyponyms;
+  }
+
+  public ArrayList<Synset> getMeronyms() {
+    return meronyms;
+  }
+
+  public ArrayList<Synset> getHolonyms() {
+    return holonyms;
+  }
+
+  public ArrayList<WordPOS> getSynonyms() {
+    return synonyms;
+  }
+  
+  public String getGloss() {
+    return this.synset.getGloss().toString();
+  }
+  
+  public long getSynsetID() {
+    return this.synset.getOffset();
+  }
+}

Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/SynNode.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java?rev=1689330&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java Mon Jul  6 07:49:31 2015
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import opennlp.tools.util.eval.EvaluationMonitor;
+
+public interface WSDEvaluationMonitor extends
+    EvaluationMonitor<WordToDisambiguate> {
+
+}
+

Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluationMonitor.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java?rev=1689330&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java Mon Jul  6 07:49:31 2015
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import net.sf.extjwnl.data.POS;
+import opennlp.tools.disambiguator.lesk.Lesk;
+import opennlp.tools.util.eval.Evaluator;
+import opennlp.tools.util.eval.Mean;
+
+/**
+ * The {@link WSDEvaluator} measures the performance of the given
+ * {@link WSDisambiguator} with the provided reference
+ * {@link WordToDisambiguate}.
+ *
+ * @see Evaluator
+ * @see WSDisambiguator
+ * @see WordToDisambiguate
+ */
+public class WSDEvaluator extends Evaluator<WordToDisambiguate> {
+
+  private Mean accuracy = new Mean();
+
+  /**
+   * The {@link WSDisambiguator} used to create the disambiguated senses.
+   */
+  private WSDisambiguator disambiguator;
+
+  /**
+   * Initializes the current instance with the given {@link WSDisambiguator}.
+   *
+   * @param disambiguator
+   *          the {@link WSDisambiguator} to evaluate.
+   * @param listeners
+   *          evaluation sample listeners
+   */
+  public WSDEvaluator(WSDisambiguator disambiguator,
+      WSDEvaluationMonitor... listeners) {
+    super(listeners);
+    this.disambiguator = disambiguator;
+  }
+
+  // @Override
+  protected WordToDisambiguate processSample(WordToDisambiguate reference) {
+
+    String[] referenceSenses = reference.getSenseIDs().toArray(
+        new String[reference.getSenseIDs().size()]);
+
+    // get the best predicted sense
+    String predictedSense = disambiguator.disambiguate(reference.sentence,
+        reference.getWordIndex())[0];
+
+    // TODO review this pattern
+    String[] parts = predictedSense.split("@");
+    POS pos = POS.getPOSForKey(parts[0]);
+    long offset = Long.parseLong(parts[1]);
+    String senseKey = parts[2];
+    double score = Double.parseDouble(parts[3]);
+
+    // if we have multiple senses mapped to one sense
+    if (disambiguator.getParams().isCoarseSense()) {
+
+      // if we find the sense in one of the coarse senses
+      int found = -1;
+      for (int i = 0; i < referenceSenses.length; i++) {
+        if (referenceSenses[i].equals(senseKey)) {
+          // Constants.print("++++++++++++++++++++++++ YES");
+          accuracy.add(1);
+          found = i;
+          break;
+        }
+      }
+      if (found < 0) {
+        // Constants.print("NO : "+referenceSenses[0]+"+++" + senseKey);
+        accuracy.add(0);
+      }
+
+    } // else we have fine grained senses (only one mapped sense)
+    else {
+      if (referenceSenses[0].equals(senseKey)) {
+        // Constants.print("++++++++++++++++++++++++ YES");
+        accuracy.add(1);
+      } else {
+        // Constants.print("NO : "+referenceSenses[0]+"+++" + senseKey);
+        accuracy.add(0);
+      }
+    }
+    return new WordToDisambiguate(reference.getSentence(),
+        reference.getWordIndex());
+  }
+
+  public double getAccuracy() {
+    return accuracy.mean();
+  }
+
+  public long getWordCount() {
+    return accuracy.count();
+  }
+
+}
+

Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDEvaluator.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java?rev=1689330&r1=1689329&r2=1689330&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java Mon Jul  6 07:49:31 2015
@@ -65,6 +65,12 @@ public class WordPOS {
     IndexWord indexWord;
     try {
       indexWord = Loader.getDictionary().lookupIndexWord(pos, word);
+      if (indexWord == null) {
+        Constants
+            .print("NULL synset probably a POS tagger mistake ! :: [POS] : "
+                + pos.getLabel() + " [word] : " + word);
+        return null;
+      }
       List<Synset> synsets = indexWord.getSenses();
       return (new ArrayList<Synset>(synsets));
     } catch (JWNLException e) {
@@ -79,11 +85,6 @@ public class WordPOS {
     List originalList = this.getStems();
     List listToCompare = wordToCompare.getStems();
 
-    // Constants.print("+++++++++++++++++++++  ::: "+ this.getWord());
-    // Constants.print("+++++++++++++++++++++  ::: "+ wordToCompare.getWord());
-    // Constants.print("the first list is \n"+originalList.toString());
-    // Constants.print("the second list is \n"+listToCompare.toString());
-
     if (originalList == null || listToCompare == null) { // any of the two
                                                          // requested words do
                                                          // not exist
@@ -96,7 +97,7 @@ public class WordPOS {
 
   // uses Lemma to check if two words are equivalent
   public boolean isLemmaEquivalent(WordPOS wordToCompare) {
-    // TODO use lemmatizer to compare with lemmas
+    // TODO try using lemmatizer to compare with lemmas
 
     ArrayList<String> lemmas_word = new ArrayList();
     ArrayList<String> lemmas_wordToCompare = new ArrayList();

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java?rev=1689330&r1=1689329&r2=1689330&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java Mon Jul  6 07:49:31 2015
@@ -24,11 +24,11 @@ import opennlp.tools.disambiguator.lesk.
 public class WordSense implements Comparable {
 
   protected WTDLesk WTDLesk;
-  protected Node node;
+  protected SynNode node;
   protected int id;
   protected double score;
 
-  public WordSense(WTDLesk WTDLesk, Node node) {
+  public WordSense(WTDLesk WTDLesk, SynNode node) {
     super();
     this.WTDLesk = WTDLesk;
     this.node = node;
@@ -46,11 +46,11 @@ public class WordSense implements Compar
     this.WTDLesk = WTDLesk;
   }
 
-  public Node getNode() {
+  public SynNode getNode() {
     return node;
   }
 
-  public void setNode(Node node) {
+  public void setNode(SynNode node) {
     this.node = node;
   }
 
@@ -74,8 +74,8 @@ public class WordSense implements Compar
     return (this.score - ((WordSense) o).score) < 0 ? 1 : -1;
   }
 
-  public String getSense() {
-    return node.getSense();
+  public String getGloss() {
+    return node.getGloss();
   }
 
 }

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java?rev=1689330&r1=1689329&r2=1689330&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java Mon Jul  6 07:49:31 2015
@@ -37,10 +37,6 @@ public class WordToDisambiguate {
 
   protected ArrayList<String> senseIDs;
 
-  /**
-   * Constructor
-   */
-
   public WordToDisambiguate(String[] sentence, int wordIndex, int sense)
       throws IllegalArgumentException {
     super();
@@ -70,11 +66,7 @@ public class WordToDisambiguate {
     this.sense = -1;
 
   }
-
-  /**
-   * Getters and Setters
-   */
-
+   
   // Sentence
   public String[] getSentence() {
     return sentence;
@@ -118,7 +110,6 @@ public class WordToDisambiguate {
     }
 
     return ref;
-
   }
 
   public String getWord() {
@@ -157,3 +148,4 @@ public class WordToDisambiguate {
     return (wordIndex + "\t" + getWord() + "\n" + sentence);
   }
 }
+

Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java?rev=1689330&r1=1689329&r2=1689330&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java Mon Jul  6 07:49:31 2015
@@ -16,30 +16,32 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-
 package opennlp.tools.disambiguator.lesk;
 
 import java.security.InvalidParameterException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.List;
 
 import opennlp.tools.disambiguator.Constants;
 import opennlp.tools.disambiguator.Loader;
-import opennlp.tools.disambiguator.Node;
+import opennlp.tools.disambiguator.SynNode;
 import opennlp.tools.disambiguator.PreProcessor;
 import opennlp.tools.disambiguator.WSDParameters;
 import opennlp.tools.disambiguator.WSDisambiguator;
 import opennlp.tools.disambiguator.WordPOS;
 import opennlp.tools.disambiguator.WordSense;
 import opennlp.tools.util.Span;
+import net.sf.extjwnl.JWNLException;
 import net.sf.extjwnl.data.Synset;
+import net.sf.extjwnl.data.Word;
 
 /**
- * Implementation of the <b>Overlap Of Senses</b> approach originally proposed by Lesk.
- * The main idea is to check for word overlaps in the sense definitions of the surrounding context. 
- * An overlap is when two words have similar stems.
- * The more overlaps a word has the higher its score.
- * Different variations of the approach are included in this class.
+ * Implementation of the <b>Overlap Of Senses</b> approach originally proposed
+ * by Lesk. The main idea is to check for word overlaps in the sense definitions
+ * of the surrounding context. An overlap is when two words have similar stems.
+ * The more overlaps a word has the higher its score. Different variations of
+ * the approach are included in this class.
  * 
  */
 public class Lesk implements WSDisambiguator {
@@ -48,27 +50,29 @@ public class Lesk implements WSDisambigu
    * The lesk specific parameters
    */
   protected LeskParameters params;
-  
+
   public Lesk() {
     this(null);
   }
 
   /**
    * Initializes the loader object and sets the input parameters
-   * @param Input Parameters
+   * 
+   * @param Input
+   *          Parameters
    * @throws InvalidParameterException
    */
   public Lesk(LeskParameters params) throws InvalidParameterException {
     Loader loader = new Loader();
     this.setParams(params);
   }
-  
 
   /**
-   * If the parameters are null set the default ones, else only set them if they valid.
-   * Invalid parameters will return a exception
+   * If the parameters are null set the default ones, else only set them if they
+   * valid. Invalid parameters will return a exception
    * 
-   * @param Input parameters
+   * @param Input
+   *          parameters
    * @throws InvalidParameterException
    */
   @Override
@@ -84,7 +88,7 @@ public class Lesk implements WSDisambigu
     }
   }
 
-  /** 
+  /**
    * @return the parameter settings
    */
   public LeskParameters getParams() {
@@ -94,7 +98,8 @@ public class Lesk implements WSDisambigu
   /**
    * The basic Lesk method where the entire context is considered for overlaps
    * 
-   * @param The word to disambiguate 
+   * @param The
+   *          word to disambiguate
    * @return The array of WordSenses with their scores
    */
   public ArrayList<WordSense> basic(WTDLesk wtd) {
@@ -103,10 +108,10 @@ public class Lesk implements WSDisambigu
     WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
 
     ArrayList<Synset> synsets = word.getSynsets();
-    ArrayList<Node> nodes = new ArrayList<Node>();
+    ArrayList<SynNode> nodes = new ArrayList<SynNode>();
 
     for (Synset synset : synsets) {
-      Node node = new Node(synset, relvWords);
+      SynNode node = new SynNode(synset, relvWords);
       nodes.add(node);
     }
 
@@ -132,7 +137,9 @@ public class Lesk implements WSDisambigu
 
   /**
    * The basic Lesk method but applied to a default context windows
-   * @param The word to disambiguate 
+   * 
+   * @param The
+   *          word to disambiguate
    * @return The array of WordSenses with their scores
    */
   public ArrayList<WordSense> basicContextual(WTDLesk wtd) {
@@ -141,7 +148,9 @@ public class Lesk implements WSDisambigu
 
   /**
    * The basic Lesk method but applied to a custom context windows
-   * @param The word to disambiguate 
+   * 
+   * @param The
+   *          word to disambiguate
    * @param windowSize
    * @return The array of WordSenses with their scores
    */
@@ -150,9 +159,12 @@ public class Lesk implements WSDisambigu
   }
 
   /**
-   * The basic Lesk method but applied to a context windows set by custom backward and forward window lengths
-   * @param wtd the word to disambiguate 
-   * @param windowBackward 
+   * The basic Lesk method but applied to a context windows set by custom
+   * backward and forward window lengths
+   * 
+   * @param wtd
+   *          the word to disambiguate
+   * @param windowBackward
    * @return the array of WordSenses with their scores
    */
   public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowBackward,
@@ -163,10 +175,10 @@ public class Lesk implements WSDisambigu
     WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
 
     ArrayList<Synset> synsets = word.getSynsets();
-    ArrayList<Node> nodes = new ArrayList<Node>();
+    ArrayList<SynNode> nodes = new ArrayList<SynNode>();
 
     for (Synset synset : synsets) {
-      Node node = new Node(synset, relvWords);
+      SynNode node = new SynNode(synset, relvWords);
       nodes.add(node);
     }
 
@@ -196,11 +208,16 @@ public class Lesk implements WSDisambigu
   }
 
   /**
-   * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps across the entire context
-   * The scoring function uses linear weights.
-   * @param wtd the word to disambiguate
-   * @param depth how deep to go into each feature tree
-   * @param depthScoreWeight the weighing per depth level
+   * An extended version of the Lesk approach that takes into consideration
+   * semantically related feature overlaps across the entire context The scoring
+   * function uses linear weights.
+   * 
+   * @param wtd
+   *          the word to disambiguate
+   * @param depth
+   *          how deep to go into each feature tree
+   * @param depthScoreWeight
+   *          the weighing per depth level
    * @param includeSynonyms
    * @param includeHypernyms
    * @param includeHyponyms
@@ -219,11 +236,16 @@ public class Lesk implements WSDisambigu
   }
 
   /**
-   * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a default context window
-   * The scoring function uses linear weights.
-   * @param wtd the word to disambiguate 
-   * @param depth how deep to go into each feature tree
-   * @param depthScoreWeight the weighing per depth level
+   * An extended version of the Lesk approach that takes into consideration
+   * semantically related feature overlaps in a default context window The
+   * scoring function uses linear weights.
+   * 
+   * @param wtd
+   *          the word to disambiguate
+   * @param depth
+   *          how deep to go into each feature tree
+   * @param depthScoreWeight
+   *          the weighing per depth level
    * @param includeSynonyms
    * @param includeHypernyms
    * @param includeHyponyms
@@ -243,12 +265,18 @@ public class Lesk implements WSDisambigu
   }
 
   /**
-   * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a custom context window
-   * The scoring function uses linear weights.
-   * @param wtd the word to disambiguate 
-   * @param windowSize the custom context window size
-   * @param depth how deep to go into each feature tree
-   * @param depthScoreWeight the weighing per depth level
+   * An extended version of the Lesk approach that takes into consideration
+   * semantically related feature overlaps in a custom context window The
+   * scoring function uses linear weights.
+   * 
+   * @param wtd
+   *          the word to disambiguate
+   * @param windowSize
+   *          the custom context window size
+   * @param depth
+   *          how deep to go into each feature tree
+   * @param depthScoreWeight
+   *          the weighing per depth level
    * @param includeSynonyms
    * @param includeHypernyms
    * @param includeHyponyms
@@ -266,15 +294,21 @@ public class Lesk implements WSDisambigu
         includeMeronyms, includeHolonyms);
   }
 
-
   /**
-   * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a custom context window
-   * The scoring function uses linear weights.
-   * @param wtd the word to disambiguate 
-   * @param windowBackward the custom context backward window size
-   * @param windowForward the custom context forward window size
-   * @param depth how deep to go into each feature tree
-   * @param depthScoreWeight the weighing per depth level
+   * An extended version of the Lesk approach that takes into consideration
+   * semantically related feature overlaps in a custom context window The
+   * scoring function uses linear weights.
+   * 
+   * @param wtd
+   *          the word to disambiguate
+   * @param windowBackward
+   *          the custom context backward window size
+   * @param windowForward
+   *          the custom context forward window size
+   * @param depth
+   *          how deep to go into each feature tree
+   * @param depthScoreWeight
+   *          the weighing per depth level
    * @param includeSynonyms
    * @param includeHypernyms
    * @param includeHyponyms
@@ -293,10 +327,10 @@ public class Lesk implements WSDisambigu
     WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
 
     ArrayList<Synset> synsets = word.getSynsets();
-    ArrayList<Node> nodes = new ArrayList<Node>();
+    ArrayList<SynNode> nodes = new ArrayList<SynNode>();
 
     for (Synset synset : synsets) {
-      Node node = new Node(synset, relvWords);
+      SynNode node = new SynNode(synset, relvWords);
       nodes.add(node);
     }
 
@@ -341,12 +375,15 @@ public class Lesk implements WSDisambigu
 
   }
 
-  
   /**
-   * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in all the context.
-   * The scoring function uses exponential weights.
-   * @param wtd the word to disambiguate 
-   * @param depth how deep to go into each feature tree
+   * An extended version of the Lesk approach that takes into consideration
+   * semantically related feature overlaps in all the context. The scoring
+   * function uses exponential weights.
+   * 
+   * @param wtd
+   *          the word to disambiguate
+   * @param depth
+   *          how deep to go into each feature tree
    * @param intersectionExponent
    * @param depthExponent
    * @param includeSynonyms
@@ -366,12 +403,16 @@ public class Lesk implements WSDisambigu
         includeMeronyms, includeHolonyms);
 
   }
-  
+
   /**
-   * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a default window in the context.
+   * An extended version of the Lesk approach that takes into consideration
+   * semantically related feature overlaps in a default window in the context.
    * The scoring function uses exponential weights.
-   * @param wtd the word to disambiguate 
-   * @param depth how deep to go into each feature tree
+   * 
+   * @param wtd
+   *          the word to disambiguate
+   * @param depth
+   *          how deep to go into each feature tree
    * @param intersectionExponent
    * @param depthExponent
    * @param includeSynonyms
@@ -390,13 +431,17 @@ public class Lesk implements WSDisambigu
         depth, intersectionExponent, depthExponent, includeSynonyms,
         includeHypernyms, includeHyponyms, includeMeronyms, includeHolonyms);
   }
-  
+
   /**
-   * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a custom window in the context.
+   * An extended version of the Lesk approach that takes into consideration
+   * semantically related feature overlaps in a custom window in the context.
    * The scoring function uses exponential weights.
-   * @param wtd the word to disambiguate 
-   * @param windowSize 
-   * @param depth how deep to go into each feature tree
+   * 
+   * @param wtd
+   *          the word to disambiguate
+   * @param windowSize
+   * @param depth
+   *          how deep to go into each feature tree
    * @param intersectionExponent
    * @param depthExponent
    * @param includeSynonyms
@@ -417,9 +462,12 @@ public class Lesk implements WSDisambigu
   }
 
   /**
-   * An extended version of the Lesk approach that takes into consideration semantically related feature overlaps in a custom window in the context.
+   * An extended version of the Lesk approach that takes into consideration
+   * semantically related feature overlaps in a custom window in the context.
    * The scoring function uses exponential weights.
-   * @param wtd the word to disambiguate 
+   * 
+   * @param wtd
+   *          the word to disambiguate
    * @param windowBackward
    * @param windowForward
    * @param depth
@@ -442,10 +490,10 @@ public class Lesk implements WSDisambigu
     WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
 
     ArrayList<Synset> synsets = word.getSynsets();
-    ArrayList<Node> nodes = new ArrayList<Node>();
+    ArrayList<SynNode> nodes = new ArrayList<SynNode>();
 
     for (Synset synset : synsets) {
-      Node node = new Node(synset, relvWords);
+      SynNode node = new SynNode(synset, relvWords);
       nodes.add(node);
     }
 
@@ -494,6 +542,7 @@ public class Lesk implements WSDisambigu
 
   /**
    * Recursively score the hypernym tree linearly
+   * 
    * @param wordSense
    * @param child
    * @param relvWords
@@ -512,7 +561,7 @@ public class Lesk implements WSDisambigu
     ArrayList<WordPOS> relvGlossWords = PreProcessor
         .getAllRelevantWords(tokenizedGloss);
 
-    Node childNode = new Node(child, relvGlossWords);
+    SynNode childNode = new SynNode(child, relvGlossWords);
 
     childNode.setHypernyms();
     wordSense.setScore(wordSense.getScore()
@@ -525,7 +574,8 @@ public class Lesk implements WSDisambigu
   }
 
   /**
-   * Recursively score the hypernym tree exponentially 
+   * Recursively score the hypernym tree exponentially
+   * 
    * @param wordSense
    * @param child
    * @param relvWords
@@ -545,7 +595,7 @@ public class Lesk implements WSDisambigu
     ArrayList<WordPOS> relvGlossWords = PreProcessor
         .getAllRelevantWords(tokenizedGloss);
 
-    Node childNode = new Node(child, relvGlossWords);
+    SynNode childNode = new SynNode(child, relvGlossWords);
 
     childNode.setHypernyms();
     wordSense.setScore(wordSense.getScore()
@@ -560,6 +610,7 @@ public class Lesk implements WSDisambigu
 
   /**
    * Recursively score the hyponym tree linearly
+   * 
    * @param wordSense
    * @param child
    * @param relvWords
@@ -578,7 +629,7 @@ public class Lesk implements WSDisambigu
     ArrayList<WordPOS> relvGlossWords = PreProcessor
         .getAllRelevantWords(tokenizedGloss);
 
-    Node childNode = new Node(child, relvGlossWords);
+    SynNode childNode = new SynNode(child, relvGlossWords);
 
     childNode.setHyponyms();
     wordSense.setScore(wordSense.getScore()
@@ -593,6 +644,7 @@ public class Lesk implements WSDisambigu
 
   /**
    * Recursively score the hyponym tree exponentially
+   * 
    * @param wordSense
    * @param child
    * @param relvWords
@@ -612,7 +664,7 @@ public class Lesk implements WSDisambigu
     ArrayList<WordPOS> relvGlossWords = PreProcessor
         .getAllRelevantWords(tokenizedGloss);
 
-    Node childNode = new Node(child, relvGlossWords);
+    SynNode childNode = new SynNode(child, relvGlossWords);
 
     childNode.setHyponyms();
     wordSense.setScore(wordSense.getScore()
@@ -627,6 +679,7 @@ public class Lesk implements WSDisambigu
 
   /**
    * Recursively score the meronym tree linearly
+   * 
    * @param wordSense
    * @param child
    * @param relvWords
@@ -645,7 +698,7 @@ public class Lesk implements WSDisambigu
     ArrayList<WordPOS> relvGlossWords = PreProcessor
         .getAllRelevantWords(tokenizedGloss);
 
-    Node childNode = new Node(child, relvGlossWords);
+    SynNode childNode = new SynNode(child, relvGlossWords);
 
     childNode.setMeronyms();
     wordSense.setScore(wordSense.getScore()
@@ -660,6 +713,7 @@ public class Lesk implements WSDisambigu
 
   /**
    * Recursively score the meronym tree exponentially
+   * 
    * @param wordSense
    * @param child
    * @param relvWords
@@ -679,7 +733,7 @@ public class Lesk implements WSDisambigu
     ArrayList<WordPOS> relvGlossWords = PreProcessor
         .getAllRelevantWords(tokenizedGloss);
 
-    Node childNode = new Node(child, relvGlossWords);
+    SynNode childNode = new SynNode(child, relvGlossWords);
 
     childNode.setMeronyms();
     wordSense.setScore(wordSense.getScore()
@@ -694,6 +748,7 @@ public class Lesk implements WSDisambigu
 
   /**
    * Recursively score the holonym tree linearly
+   * 
    * @param wordSense
    * @param child
    * @param relvWords
@@ -712,7 +767,7 @@ public class Lesk implements WSDisambigu
     ArrayList<WordPOS> relvGlossWords = PreProcessor
         .getAllRelevantWords(tokenizedGloss);
 
-    Node childNode = new Node(child, relvGlossWords);
+    SynNode childNode = new SynNode(child, relvGlossWords);
 
     childNode.setHolonyms();
     wordSense.setScore(wordSense.getScore()
@@ -727,6 +782,7 @@ public class Lesk implements WSDisambigu
 
   /**
    * Recursively score the holonym tree exponentially
+   * 
    * @param wordSense
    * @param child
    * @param relvWords
@@ -746,7 +802,7 @@ public class Lesk implements WSDisambigu
     ArrayList<WordPOS> relvGlossWords = PreProcessor
         .getAllRelevantWords(tokenizedGloss);
 
-    Node childNode = new Node(child, relvGlossWords);
+    SynNode childNode = new SynNode(child, relvGlossWords);
 
     childNode.setHolonyms();
     wordSense.setScore(wordSense.getScore()
@@ -761,6 +817,7 @@ public class Lesk implements WSDisambigu
 
   /**
    * Checks if the feature should be counted in the score
+   * 
    * @param featureSynsets
    * @param relevantWords
    * @return count of features to consider
@@ -769,10 +826,10 @@ public class Lesk implements WSDisambigu
       ArrayList<WordPOS> relevantWords) {
     int count = 0;
     for (Synset synset : featureSynsets) {
-      Node subNode = new Node(synset, relevantWords);
+      SynNode subNode = new SynNode(synset, relevantWords);
 
       String[] tokenizedSense = Loader.getTokenizer().tokenize(
-          subNode.getSense());
+          subNode.getGloss());
       ArrayList<WordPOS> relvSenseWords = PreProcessor
           .getAllRelevantWords(tokenizedSense);
 
@@ -789,6 +846,7 @@ public class Lesk implements WSDisambigu
 
   /**
    * Checks if the synonyms should be counted in the score
+   * 
    * @param synonyms
    * @param relevantWords
    * @return count of synonyms to consider
@@ -809,15 +867,16 @@ public class Lesk implements WSDisambigu
 
   /**
    * Gets the senses of the nodes
+   * 
    * @param nodes
    * @return senses from the nodes
    */
-  public ArrayList<WordSense> updateSenses(ArrayList<Node> nodes) {
+  public ArrayList<WordSense> updateSenses(ArrayList<SynNode> nodes) {
     ArrayList<WordSense> scoredSenses = new ArrayList<WordSense>();
 
     for (int i = 0; i < nodes.size(); i++) {
       ArrayList<WordPOS> sensesComponents = PreProcessor
-          .getAllRelevantWords(PreProcessor.tokenize(nodes.get(i).getSense()));
+          .getAllRelevantWords(PreProcessor.tokenize(nodes.get(i).getGloss()));
       WordSense wordSense = new WordSense();
       nodes.get(i).setSenseRelevantWords(sensesComponents);
       wordSense.setNode(nodes.get(i));
@@ -827,25 +886,26 @@ public class Lesk implements WSDisambigu
     return scoredSenses;
 
   }
-  
+
   /**
    * Disambiguates an ambiguous word in its context
    * 
    * @param tokenizedContext
    * @param ambiguousTokenIndex
-   * @return array of sense indexes from WordNet ordered by their score.
-   * The result format is <b>POS</b>@<b>SenseID</b>@<b>Sense Score</b> 
-   * If the input token is non relevant a null is returned.
+   * @return array of sense indexes from WordNet ordered by their score. The
+   *         result format is <b>POS</b>@<b>SenseID</b>@<b>Sense Score</b> If
+   *         the input token is non relevant a null is returned.
    */
   @Override
-  public String[] disambiguate(String[] tokenizedContext, int ambiguousTokenIndex) {
-    
+  public String[] disambiguate(String[] tokenizedContext,
+      int ambiguousTokenIndex) {
+
     WTDLesk wtd = new WTDLesk(tokenizedContext, ambiguousTokenIndex);
     // if the word is not relevant return null
-    if (!Constants.isRelevant(wtd.getPosTag())){
-      return null ;
+    if (!Constants.isRelevant(wtd.getPosTag())) {
+      return null;
     }
-    
+
     ArrayList<WordSense> wsenses = null;
 
     switch (this.params.leskType) {
@@ -921,32 +981,46 @@ public class Lesk implements WSDisambigu
         LeskParameters.DFLT_DEXP, true, true, true, true, true);
     Collections.sort(wsenses);
 
-    // TODO modify to longs but for now we have strings in the data for coarsing
+    List<Word> synsetWords;
     String[] senses = new String[wsenses.size()];
+    String senseKey = "?";
     for (int i = 0; i < wsenses.size(); i++) {
+      synsetWords = wsenses.get(i).getNode().synset.getWords();
+      for (Word synWord : synsetWords) {
+        if (synWord.getLemma().equals(wtd.getWord())) {
+          try {
+            senseKey = synWord.getSenseKey();
+          } catch (JWNLException e) {
+            e.printStackTrace();
+          }
+          break;
+        }
+      }
       senses[i] = Constants.getPOS(wsenses.get(i).getWTDLesk().getPosTag())
           .getKey()
           + "@"
-          + Long.toString(wsenses.get(i).getNode().getSenseID())
+          + Long.toString(wsenses.get(i).getNode().getSynsetID())
           + "@"
-          + wsenses.get(i).getScore();
+          + senseKey + "@" + wsenses.get(i).getScore();
+
+      Collections.sort(wsenses);
     }
     return senses;
   }
 
-  
   /**
-   * Disambiguates an ambiguous word in its context
-   * The user can set a span of inputWords from the tokenized input
-   *  
+   * Disambiguates an ambiguous word in its context The user can set a span of
+   * inputWords from the tokenized input
+   * 
    * @param inputText
    * @param inputWordSpans
-   * @return array of array of sense indexes from WordNet ordered by their score.
-   * The result format is <b>POS</b>@<b>SenseID</b>@<b>Sense Score</b> 
-   * If the input token is non relevant a null is returned.
+   * @return array of array of sense indexes from WordNet ordered by their
+   *         score. The result format is <b>POS</b>@<b>SenseID</b>@<b>Sense
+   *         Score</b> If the input token is non relevant a null is returned.
    */
-  @Override 
-  public String[][] disambiguate(String[] tokenizedContext, Span[] ambiguousTokenSpans) {
+  @Override
+  public String[][] disambiguate(String[] tokenizedContext,
+      Span[] ambiguousTokenSpans) {
     // TODO need to work on spans
     return null;
   }

Added: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java?rev=1689330&view=auto
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java (added)
+++ opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java Mon Jul  6 07:49:31 2015
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import opennlp.tools.disambiguator.ims.WTDIMS;
+import opennlp.tools.disambiguator.lesk.Lesk;
+import opennlp.tools.disambiguator.lesk.LeskParameters;
+
+import org.junit.Test;
+
+public class LeskEvaluatorTest {
+
+  static DataExtractor dExtractor = new DataExtractor();
+
+  @Test
+  public static void main(String[] args) {
+    Constants.print("Evaluation Started");
+
+    String testDataLoc = "src\\test\\resources\\data\\";
+    String helpersLoc = "src\\test\\resources\\helpers\\";
+
+    File[] listOfFiles;
+    File testFolder = new File(testDataLoc);
+
+    // these are needed for mapping the sense IDs from the current data
+    String dict = helpersLoc + "EnglishLS.dictionary.xml";
+    String map = helpersLoc + "EnglishLS.sensemap";
+
+    Lesk lesk = new Lesk();
+    LeskParameters leskParams = new LeskParameters();
+    leskParams.setLeskType(LeskParameters.LESK_TYPE.LESK_BASIC);
+    lesk.setParams(leskParams);
+
+    if (testFolder.isDirectory()) {
+      listOfFiles = testFolder.listFiles();
+      for (File file : listOfFiles) {
+        WSDEvaluator evaluator = new WSDEvaluator(lesk);
+        if (file.isFile()) {
+          // don't take verbs because they are not from WordNet
+          if (!file.getName().split("\\.")[1].equals("v")) {
+            HashMap<String, ArrayList<DictionaryInstance>> senses = dExtractor
+                .extractWordSenses(dict, map, file.getName());
+            ArrayList<WTDIMS> instances = getTestData(file.getAbsolutePath(),
+                senses);
+
+            if (instances != null) {
+              Constants.print("------------------" + file.getName()
+                  + "------------------");
+              Constants.print("there are " + instances.size() + " instances");
+              for (WordToDisambiguate instance : instances) {
+                // Constants.print("sense IDs : " + instance.senseIDs);
+                evaluator.evaluateSample(instance);
+              }
+              Constants.print("the accuracy " + evaluator.getAccuracy() * 100
+                  + "%");
+            } else {
+              Constants.print("null instances");
+            }
+          }
+        }
+      }
+    }
+  }
+
+  protected static ArrayList<WTDIMS> getTestData(String testFile,
+      HashMap<String, ArrayList<DictionaryInstance>> senses) {
+    /**
+     * word tag has to be in the format "word.POS" (e.g., "activate.v",
+     * "smart.a", etc.)
+     */
+    ArrayList<WTDIMS> trainingData = dExtractor.extractWSDInstances(testFile);
+
+    // HashMap<Integer, WTDIMS> trainingData =
+    // dExtractor.extractWSDInstances(wordTrainingxmlFile);
+    for (WTDIMS data : trainingData) {
+      for (String senseId : data.getSenseIDs()) {
+        for (String dictKey : senses.keySet()) {
+          for (DictionaryInstance instance : senses.get(dictKey)) {
+            if (senseId.equals(instance.getId())) {
+              data.setSense(Integer.parseInt(dictKey.split("_")[1]));
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    return trainingData;
+  }
+
+}

Propchange: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/LeskEvaluatorTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain