You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2015/06/25 11:20:30 UTC
svn commit: r1687455 [2/2] - in /opennlp/sandbox/opennlp-wsd/src:
main/java/opennlp/tools/disambiguator/
main/java/opennlp/tools/disambiguator/ims/
main/java/opennlp/tools/disambiguator/lesk/
test/java/opennlp/tools/disambiguator/
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java Thu Jun 25 09:20:30 2015
@@ -3,10 +3,7 @@ package opennlp.tools.disambiguator.lesk
import java.security.InvalidParameterException;
import java.util.ArrayList;
-
-
import java.util.Collections;
-import java.util.Map;
import opennlp.tools.disambiguator.Constants;
import opennlp.tools.disambiguator.Loader;
@@ -18,668 +15,637 @@ import opennlp.tools.disambiguator.WordS
import opennlp.tools.util.Span;
import net.sf.extjwnl.data.Synset;
-
/**
* Class for the Lesk algorithm and variants.
*/
-public class Lesk implements WSDisambiguator{
+public class Lesk implements WSDisambiguator {
- protected LeskParameters params;
+ protected LeskParameters params;
- public Loader loader;
+ public Loader loader;
- public Lesk(){
- this(null);
- }
-
- public Lesk(LeskParameters params) throws InvalidParameterException{
- loader = new Loader();
- this.setParams(params);
- }
-
- public void setParams(LeskParameters params) throws InvalidParameterException{
- if(params==null){
- this.params = new LeskParameters();
- }
- else{
- if (params.isValid()){
- this.params = params;
- }else{
- throw new InvalidParameterException("wrong params");
- }
- }
- }
-
- public ArrayList<WordSense> basic(WTDLesk wtd) {
-
- ArrayList<WordPOS> relvWords = PreProcessor.getAllRelevantWords(wtd);
- WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
-
- ArrayList<Synset> synsets = word.getSynsets();
- ArrayList<Node> nodes = new ArrayList<Node>();
-
- for (Synset synset : synsets) {
- Node node = new Node(synset, relvWords);
- nodes.add(node);
- }
-
- ArrayList<WordSense> scoredSenses = updateSenses(nodes);
-
- for (WordSense wordSense : scoredSenses) {
- wordSense.setWTDLesk(wtd);
- int count = 0;
- for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
- ArrayList stems = (ArrayList)PreProcessor.Stem(senseWordPOS);
- for (WordPOS sentenceWordPOS : relvWords) {
- // TODO change to lemma check
- if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
- count = count + 1;
- }
- }
- }
- wordSense.setScore(count);
- }
-
- return scoredSenses;
- }
-
- public ArrayList<WordSense> basicContextual(WTDLesk wtd) {
- return this.basicContextual(wtd,LeskParameters.DFLT_WIN_SIZE);
- }
-
- public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowSize) {
- return this.basicContextual(wtd, windowSize,windowSize);
- }
-
- public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowBackward, int windowForward) {
-
- ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd, windowBackward, windowForward);
- WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
-
- ArrayList<Synset> synsets = word.getSynsets();
- ArrayList<Node> nodes = new ArrayList<Node>();
-
-
- for (Synset synset : synsets) {
- Node node = new Node(synset, relvWords);
- nodes.add(node);
- }
-
- ArrayList<WordSense> scoredSenses = updateSenses(nodes);
-
-
- for (WordSense wordSense : scoredSenses) {
- wordSense.setWTDLesk(wtd);
-
- int count = 0;
- for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
-
- for (WordPOS sentenceWordPOS : relvWords) {
- // TODO change to lemma check
- if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
- count = count + 1;
- }
- }
-
- }
- wordSense.setScore(count);
-
- }
-
- Collections.sort(scoredSenses);
-
- return scoredSenses;
- }
-
- public ArrayList<WordSense> extended(WTDLesk wtd,
- int depth, double depthScoreWeight, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
-
- return extendedContextual(wtd, 0, depth,
- depthScoreWeight, includeSynonyms, includeHypernyms,
- includeHyponyms, includeMeronyms, includeHolonyms);
-
- }
-
- public ArrayList<WordSense> extendedContextual(WTDLesk wtd,
- int depth, double depthScoreWeight,
- boolean includeSynonyms, boolean includeHypernyms,
- boolean includeHyponyms, boolean includeMeronyms,
- boolean includeHolonyms){
-
- return extendedContextual(wtd, LeskParameters.DFLT_WIN_SIZE,
- depth, depthScoreWeight, includeSynonyms, includeHypernyms,
- includeHyponyms, includeMeronyms, includeHolonyms);
-
- }
-
- public ArrayList<WordSense> extendedContextual(WTDLesk wtd,
- int windowSize, int depth, double depthScoreWeight,
- boolean includeSynonyms, boolean includeHypernyms,
- boolean includeHyponyms, boolean includeMeronyms,
- boolean includeHolonyms) {
-
- return extendedContextual(wtd, windowSize, windowSize,
- depth, depthScoreWeight, includeSynonyms, includeHypernyms,
- includeHyponyms, includeMeronyms, includeHolonyms);
- }
-
- public ArrayList<WordSense> extendedContextual(WTDLesk wtd,
- int windowBackward, int windowForward, int depth,
- double depthScoreWeight, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
-
- ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,windowBackward,windowForward);
- WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
-
- ArrayList<Synset> synsets = word.getSynsets();
- ArrayList<Node> nodes = new ArrayList<Node>();
-
- for (Synset synset : synsets) {
- Node node = new Node(synset, relvWords);
- nodes.add(node);
- }
-
- ArrayList<WordSense> scoredSenses = basicContextual(wtd,windowBackward, windowForward);
-
- for (WordSense wordSense : scoredSenses) {
-
- if (includeSynonyms) {
- wordSense.setScore(wordSense.getScore()
- + depthScoreWeight
- * assessSynonyms(wordSense.getNode().getSynonyms(),relvWords));
- }
-
- if (includeHypernyms) {
- fathomHypernyms(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, depthScoreWeight);
- }
-
- if (includeHyponyms) {
-
- fathomHyponyms(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, depthScoreWeight);
- }
-
- if (includeMeronyms) {
-
- fathomMeronyms(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, depthScoreWeight);
-
- }
-
- if (includeHolonyms) {
-
- fathomHolonyms(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, depthScoreWeight);
-
- }
-
- }
-
- return scoredSenses;
-
- }
-
- public ArrayList<WordSense> extendedExponential(WTDLesk wtd,
- int depth,
- double intersectionExponent,double depthExponent, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
-
- return extendedExponentialContextual(wtd, 0, depth,
- intersectionExponent, depthExponent, includeSynonyms,
- includeHypernyms, includeHyponyms,
- includeMeronyms, includeHolonyms);
-
- }
-
- public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
- int depth,
- double intersectionExponent,double depthExponent, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
-
- return extendedExponentialContextual(wtd, LeskParameters.DFLT_WIN_SIZE,
- depth, intersectionExponent,depthExponent, includeSynonyms, includeHypernyms,
- includeHyponyms, includeMeronyms, includeHolonyms);
- }
-
- public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
- int windowSize, int depth,
- double intersectionExponent,double depthExponent, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
-
- return extendedExponentialContextual(wtd, windowSize, windowSize,
- depth, intersectionExponent,depthExponent, includeSynonyms, includeHypernyms,
- includeHyponyms, includeMeronyms, includeHolonyms);
- }
-
- public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
- int windowBackward, int windowForward, int depth,
- double intersectionExponent,double depthExponent, boolean includeSynonyms,
- boolean includeHypernyms, boolean includeHyponyms,
- boolean includeMeronyms, boolean includeHolonyms) {
- ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,windowBackward,windowForward);
- WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
-
- ArrayList<Synset> synsets = word.getSynsets();
- ArrayList<Node> nodes = new ArrayList<Node>();
-
- for (Synset synset : synsets) {
- Node node = new Node(synset, relvWords);
- nodes.add(node);
- }
-
- ArrayList<WordSense> scoredSenses = basicContextual(wtd, windowForward, windowBackward);
-
- for (WordSense wordSense : scoredSenses) {
-
-
- if (includeSynonyms) {
- wordSense.setScore(wordSense.getScore() + Math.pow(assessSynonyms(wordSense.getNode().getSynonyms(),
- relvWords),intersectionExponent));
- }
-
- if (includeHypernyms) {
- fathomHypernymsExponential(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth,intersectionExponent, depthExponent);
- }
-
- if (includeHyponyms) {
-
- fathomHyponymsExponential(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, intersectionExponent,depthExponent);
- }
-
- if (includeMeronyms) {
-
- fathomMeronymsExponential(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, intersectionExponent,depthExponent);
-
- }
-
- if (includeHolonyms) {
-
- fathomHolonymsExponential(wordSense, wordSense.getNode().synset,
- relvWords, depth, depth, intersectionExponent,depthExponent);
-
- }
-
- }
-
- return scoredSenses;
-
- }
-
- private void fathomHypernyms(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double depthScoreWeight) {
- if (depth == 0)
- return;
-
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
-
-
- Node childNode = new Node(child, relvGlossWords);
-
- childNode.setHypernyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getHypernyms(), relvWords));
- for (Synset hypernym : childNode.getHypernyms()) {
- fathomHypernyms(wordSense, hypernym, relvGlossWords, depth - 1, maxDepth,
- depthScoreWeight);
- }
- }
-
- private void fathomHypernymsExponential(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double intersectionExponent, double depthScoreExponent) {
- if (depth == 0)
- return;
-
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
-
- Node childNode = new Node(child, relvGlossWords);
-
- childNode.setHypernyms();
- wordSense
- .setScore(wordSense.getScore()
- + Math.pow(
- assessFeature(childNode.getHypernyms(),
- relvWords), intersectionExponent)
- / Math.pow(depth, depthScoreExponent));
- for (Synset hypernym : childNode.getHypernyms()) {
-
- fathomHypernymsExponential(wordSense, hypernym, relvGlossWords, depth - 1, maxDepth,
- intersectionExponent, depthScoreExponent);
- }
- }
-
- private void fathomHyponyms(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double depthScoreWeight) {
- if (depth == 0)
- return;
-
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
-
- Node childNode = new Node(child, relvGlossWords);
-
- childNode.setHyponyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getHyponyms(), relvWords));
- for (Synset hyponym : childNode.getHyponyms()) {
-
- fathomHyponyms(wordSense, hyponym, relvGlossWords, depth - 1, maxDepth,
- depthScoreWeight);
- }
- }
-
- private void fathomHyponymsExponential(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double intersectionExponent, double depthScoreExponent) {
- if (depth == 0)
- return;
-
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
-
- Node childNode = new Node(child, relvGlossWords);
-
- childNode.setHyponyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(
- assessFeature(childNode.getHyponyms(), relvWords),
- intersectionExponent)
- / Math.pow(depth, depthScoreExponent));
- for (Synset hyponym : childNode.getHyponyms()) {
-
- fathomHyponymsExponential(wordSense, hyponym, relvGlossWords, depth - 1, maxDepth,
- intersectionExponent, depthScoreExponent);
- }
- }
-
- private void fathomMeronyms(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double depthScoreWeight) {
- if (depth == 0)
- return;
-
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
-
- Node childNode = new Node(child, relvGlossWords);
-
- childNode.setMeronyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getMeronyms(), relvWords));
- for (Synset meronym : childNode.getMeronyms()) {
-
- fathomMeronyms(wordSense, meronym, relvGlossWords, depth - 1, maxDepth,
- depthScoreWeight);
- }
- }
-
- private void fathomMeronymsExponential(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double intersectionExponent, double depthScoreExponent) {
- if (depth == 0)
- return;
-
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
-
- Node childNode = new Node(child, relvGlossWords);
-
- childNode.setMeronyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(
- assessFeature(childNode.getMeronyms(), relvWords),
- intersectionExponent)
- / Math.pow(depth, depthScoreExponent));
- for (Synset meronym : childNode.getMeronyms()) {
-
- fathomMeronymsExponential(wordSense, meronym, relvGlossWords, depth - 1, maxDepth,
- intersectionExponent, depthScoreExponent);
- }
- }
-
- private void fathomHolonyms(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double depthScoreWeight) {
- if (depth == 0)
- return;
-
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
-
- Node childNode = new Node(child, relvGlossWords);
-
-
- childNode.setHolonyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(depthScoreWeight, maxDepth - depth + 1)
- * assessFeature(childNode.getHolonyms(), relvWords));
- for (Synset holonym : childNode.getHolonyms()) {
-
- fathomHolonyms(wordSense, holonym, relvGlossWords, depth - 1, maxDepth,
- depthScoreWeight);
- }
- }
-
- private void fathomHolonymsExponential(WordSense wordSense, Synset child,
- ArrayList<WordPOS> relvWords, int depth, int maxDepth,
- double intersectionExponent, double depthScoreExponent) {
- if (depth == 0)
- return;
-
- String[] tokenizedGloss = Loader.getTokenizer().tokenize(child.getGloss().toString());
- ArrayList<WordPOS> relvGlossWords = PreProcessor.getAllRelevantWords(tokenizedGloss);
-
- Node childNode = new Node(child, relvGlossWords);
-
- childNode.setHolonyms();
- wordSense.setScore(wordSense.getScore()
- + Math.pow(
- assessFeature(childNode.getHolonyms(), relvWords),
- intersectionExponent)
- / Math.pow(depth, depthScoreExponent));
- for (Synset holonym : childNode.getHolonyms()) {
-
- fathomHolonymsExponential(wordSense, holonym, relvGlossWords, depth - 1, maxDepth,
- intersectionExponent, depthScoreExponent);
- }
- }
-
- private int assessFeature(ArrayList<Synset> featureSynsets,
- ArrayList<WordPOS> relevantWords) {
- int count = 0;
- for (Synset synset : featureSynsets) {
- Node subNode = new Node(synset, relevantWords);
-
- String[] tokenizedSense = Loader.getTokenizer().tokenize(subNode.getSense());
- ArrayList<WordPOS> relvSenseWords = PreProcessor.getAllRelevantWords(tokenizedSense);
-
- for (WordPOS senseWord : relvSenseWords) {
- for (WordPOS sentenceWord : relevantWords) {
- if (sentenceWord.isStemEquivalent(senseWord)) {
- count = count + 1;
- }
- }
- }
- }
- return count;
- }
-
- private int assessSynonyms(ArrayList<WordPOS> synonyms,
- ArrayList<WordPOS> relevantWords) {
- int count = 0;
-
- for (WordPOS synonym : synonyms) {
- for (WordPOS sentenceWord : relevantWords) {
- // TODO try to switch to lemmatizer
- if (sentenceWord.isStemEquivalent(synonym)) {
- count = count + 1;
- }
- }
-
- }
-
- return count;
- }
-
- public ArrayList<WordSense> updateSenses(ArrayList<Node> nodes) {
-
- ArrayList<WordSense> scoredSenses = new ArrayList<WordSense>();
-
- for (int i=0; i< nodes.size(); i++ ) {
- ArrayList<WordPOS> sensesComponents = PreProcessor.getAllRelevantWords(PreProcessor.tokenize(nodes.get(i).getSense()));
- WordSense wordSense = new WordSense();
- nodes.get(i).setSenseRelevantWords(sensesComponents);
- wordSense.setNode(nodes.get(i));
- wordSense.setId(i);
- scoredSenses.add(wordSense);
- }
- return scoredSenses;
-
- }
-
- // disambiguates a WTDLesk and returns an array of sense indexes from WordNet ordered by their score
- @Override
- public String[] disambiguate(String[] inputText, int inputWordIndex) {
- WTDLesk wtd = new WTDLesk(inputText,inputWordIndex);
- ArrayList<WordSense> wsenses = null;
-
- switch(this.params.leskType){
- case LESK_BASIC:
- wsenses = basic(wtd);
- break;
- case LESK_BASIC_CTXT :
- wsenses = basicContextual(wtd);
- break;
- case LESK_BASIC_CTXT_WIN :
- wsenses = basicContextual(wtd, this.params.win_b_size);
- break;
- case LESK_BASIC_CTXT_WIN_BF :
- wsenses = basicContextual(wtd, this.params.win_b_size, this.params.win_f_size);
- break;
- case LESK_EXT :
- wsenses = extended(wtd,
- this.params.depth,
- this.params.depth_weight,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_CTXT :
- wsenses = extendedContextual(wtd,
- this.params.depth,
- this.params.depth_weight,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_CTXT_WIN :
- wsenses = extendedContextual(wtd,
- this.params.win_b_size,
- this.params.depth,
- this.params.depth_weight,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_CTXT_WIN_BF :
- wsenses = extendedContextual(wtd,
- this.params.win_b_size,
- this.params.win_f_size,
- this.params.depth,
- this.params.depth_weight,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_EXP :
- wsenses = extendedExponential(wtd,
- this.params.depth,
- this.params.iexp,
- this.params.dexp,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_EXP_CTXT :
- wsenses = extendedExponentialContextual(wtd,
- this.params.depth,
- this.params.iexp,
- this.params.dexp,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_EXP_CTXT_WIN :
- wsenses = extendedExponentialContextual(wtd,
- this.params.win_b_size,
- this.params.depth,
- this.params.iexp,
- this.params.dexp,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- case LESK_EXT_EXP_CTXT_WIN_BF :
- wsenses = extendedExponentialContextual(wtd,
- this.params.win_b_size,
- this.params.win_f_size,
- this.params.depth,
- this.params.iexp,
- this.params.dexp,
- this.params.fathom_synonyms,
- this.params.fathom_hypernyms,
- this.params.fathom_hyponyms,
- this.params.fathom_meronyms,
- this.params.fathom_holonyms);
- break;
- }
-
- wsenses = extendedExponentialContextual(wtd, LeskParameters.DFLT_WIN_SIZE,LeskParameters.DFLT_DEPTH,LeskParameters.DFLT_IEXP,LeskParameters.DFLT_DEXP, true,true,true,true,true );
- Collections.sort(wsenses);
-
- String[] senses = new String[wsenses.size()];
- for (int i = 0; i < wsenses.size() ; i++) {
- senses[i] = wsenses.get(i).getSense();
- }
- return senses;
- }
-
- @Override
- public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
- // TODO need to work on spans
- return null;
- }
+ public Lesk() {
+ this(null);
+ }
+
+ public Lesk(LeskParameters params) throws InvalidParameterException {
+ loader = new Loader();
+ this.setParams(params);
+ }
+
+ public void setParams(LeskParameters params) throws InvalidParameterException {
+ if (params == null) {
+ this.params = new LeskParameters();
+ } else {
+ if (params.isValid()) {
+ this.params = params;
+ } else {
+ throw new InvalidParameterException("wrong params");
+ }
+ }
+ }
+
+ public ArrayList<WordSense> basic(WTDLesk wtd) {
+
+ ArrayList<WordPOS> relvWords = PreProcessor.getAllRelevantWords(wtd);
+ WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+
+ ArrayList<Synset> synsets = word.getSynsets();
+ ArrayList<Node> nodes = new ArrayList<Node>();
+
+ for (Synset synset : synsets) {
+ Node node = new Node(synset, relvWords);
+ nodes.add(node);
+ }
+
+ ArrayList<WordSense> scoredSenses = updateSenses(nodes);
+
+ for (WordSense wordSense : scoredSenses) {
+ wordSense.setWTDLesk(wtd);
+ int count = 0;
+ for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
+ ArrayList stems = (ArrayList) PreProcessor.Stem(senseWordPOS);
+ for (WordPOS sentenceWordPOS : relvWords) {
+ // TODO change to lemma check
+ if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
+ count = count + 1;
+ }
+ }
+ }
+ wordSense.setScore(count);
+ }
+
+ return scoredSenses;
+ }
+
+ public ArrayList<WordSense> basicContextual(WTDLesk wtd) {
+ return this.basicContextual(wtd, LeskParameters.DFLT_WIN_SIZE);
+ }
+
+ public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowSize) {
+ return this.basicContextual(wtd, windowSize, windowSize);
+ }
+
+ public ArrayList<WordSense> basicContextual(WTDLesk wtd, int windowBackward,
+ int windowForward) {
+
+ ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,
+ windowBackward, windowForward);
+ WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+
+ ArrayList<Synset> synsets = word.getSynsets();
+ ArrayList<Node> nodes = new ArrayList<Node>();
+
+ for (Synset synset : synsets) {
+ Node node = new Node(synset, relvWords);
+ nodes.add(node);
+ }
+
+ ArrayList<WordSense> scoredSenses = updateSenses(nodes);
+
+ for (WordSense wordSense : scoredSenses) {
+ wordSense.setWTDLesk(wtd);
+
+ int count = 0;
+ for (WordPOS senseWordPOS : wordSense.getNode().getSenseRelevantWords()) {
+
+ for (WordPOS sentenceWordPOS : relvWords) {
+ // TODO change to lemma check
+ if (sentenceWordPOS.isStemEquivalent(senseWordPOS)) {
+ count = count + 1;
+ }
+ }
+
+ }
+ wordSense.setScore(count);
+
+ }
+
+ Collections.sort(scoredSenses);
+
+ return scoredSenses;
+ }
+
+ public ArrayList<WordSense> extended(WTDLesk wtd, int depth,
+ double depthScoreWeight, boolean includeSynonyms,
+ boolean includeHypernyms, boolean includeHyponyms,
+ boolean includeMeronyms, boolean includeHolonyms) {
+
+ return extendedContextual(wtd, 0, depth, depthScoreWeight, includeSynonyms,
+ includeHypernyms, includeHyponyms, includeMeronyms, includeHolonyms);
+
+ }
+
+ public ArrayList<WordSense> extendedContextual(WTDLesk wtd, int depth,
+ double depthScoreWeight, boolean includeSynonyms,
+ boolean includeHypernyms, boolean includeHyponyms,
+ boolean includeMeronyms, boolean includeHolonyms) {
+
+ return extendedContextual(wtd, LeskParameters.DFLT_WIN_SIZE, depth,
+ depthScoreWeight, includeSynonyms, includeHypernyms, includeHyponyms,
+ includeMeronyms, includeHolonyms);
+
+ }
+
+ public ArrayList<WordSense> extendedContextual(WTDLesk wtd, int windowSize,
+ int depth, double depthScoreWeight, boolean includeSynonyms,
+ boolean includeHypernyms, boolean includeHyponyms,
+ boolean includeMeronyms, boolean includeHolonyms) {
+
+ return extendedContextual(wtd, windowSize, windowSize, depth,
+ depthScoreWeight, includeSynonyms, includeHypernyms, includeHyponyms,
+ includeMeronyms, includeHolonyms);
+ }
+
+ public ArrayList<WordSense> extendedContextual(WTDLesk wtd,
+ int windowBackward, int windowForward, int depth,
+ double depthScoreWeight, boolean includeSynonyms,
+ boolean includeHypernyms, boolean includeHyponyms,
+ boolean includeMeronyms, boolean includeHolonyms) {
+
+ ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,
+ windowBackward, windowForward);
+ WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+
+ ArrayList<Synset> synsets = word.getSynsets();
+ ArrayList<Node> nodes = new ArrayList<Node>();
+
+ for (Synset synset : synsets) {
+ Node node = new Node(synset, relvWords);
+ nodes.add(node);
+ }
+
+ ArrayList<WordSense> scoredSenses = basicContextual(wtd, windowBackward,
+ windowForward);
+
+ for (WordSense wordSense : scoredSenses) {
+
+ if (includeSynonyms) {
+ wordSense.setScore(wordSense.getScore() + depthScoreWeight
+ * assessSynonyms(wordSense.getNode().getSynonyms(), relvWords));
+ }
+
+ if (includeHypernyms) {
+ fathomHypernyms(wordSense, wordSense.getNode().synset, relvWords,
+ depth, depth, depthScoreWeight);
+ }
+
+ if (includeHyponyms) {
+
+ fathomHyponyms(wordSense, wordSense.getNode().synset, relvWords, depth,
+ depth, depthScoreWeight);
+ }
+
+ if (includeMeronyms) {
+
+ fathomMeronyms(wordSense, wordSense.getNode().synset, relvWords, depth,
+ depth, depthScoreWeight);
+
+ }
+
+ if (includeHolonyms) {
+
+ fathomHolonyms(wordSense, wordSense.getNode().synset, relvWords, depth,
+ depth, depthScoreWeight);
+
+ }
+
+ }
+
+ return scoredSenses;
+
+ }
+
+ public ArrayList<WordSense> extendedExponential(WTDLesk wtd, int depth,
+ double intersectionExponent, double depthExponent,
+ boolean includeSynonyms, boolean includeHypernyms,
+ boolean includeHyponyms, boolean includeMeronyms, boolean includeHolonyms) {
+
+ return extendedExponentialContextual(wtd, 0, depth, intersectionExponent,
+ depthExponent, includeSynonyms, includeHypernyms, includeHyponyms,
+ includeMeronyms, includeHolonyms);
+
+ }
+
+ public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
+ int depth, double intersectionExponent, double depthExponent,
+ boolean includeSynonyms, boolean includeHypernyms,
+ boolean includeHyponyms, boolean includeMeronyms, boolean includeHolonyms) {
+
+ return extendedExponentialContextual(wtd, LeskParameters.DFLT_WIN_SIZE,
+ depth, intersectionExponent, depthExponent, includeSynonyms,
+ includeHypernyms, includeHyponyms, includeMeronyms, includeHolonyms);
+ }
+
+ public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
+ int windowSize, int depth, double intersectionExponent,
+ double depthExponent, boolean includeSynonyms, boolean includeHypernyms,
+ boolean includeHyponyms, boolean includeMeronyms, boolean includeHolonyms) {
+
+ return extendedExponentialContextual(wtd, windowSize, windowSize, depth,
+ intersectionExponent, depthExponent, includeSynonyms, includeHypernyms,
+ includeHyponyms, includeMeronyms, includeHolonyms);
+ }
+
+ public ArrayList<WordSense> extendedExponentialContextual(WTDLesk wtd,
+ int windowBackward, int windowForward, int depth,
+ double intersectionExponent, double depthExponent,
+ boolean includeSynonyms, boolean includeHypernyms,
+ boolean includeHyponyms, boolean includeMeronyms, boolean includeHolonyms) {
+ ArrayList<WordPOS> relvWords = PreProcessor.getRelevantWords(wtd,
+ windowBackward, windowForward);
+ WordPOS word = new WordPOS(wtd.getWord(), Constants.getPOS(wtd.getPosTag()));
+
+ ArrayList<Synset> synsets = word.getSynsets();
+ ArrayList<Node> nodes = new ArrayList<Node>();
+
+ for (Synset synset : synsets) {
+ Node node = new Node(synset, relvWords);
+ nodes.add(node);
+ }
+
+ ArrayList<WordSense> scoredSenses = basicContextual(wtd, windowForward,
+ windowBackward);
+
+ for (WordSense wordSense : scoredSenses) {
+
+ if (includeSynonyms) {
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(
+ assessSynonyms(wordSense.getNode().getSynonyms(), relvWords),
+ intersectionExponent));
+ }
+
+ if (includeHypernyms) {
+ fathomHypernymsExponential(wordSense, wordSense.getNode().synset,
+ relvWords, depth, depth, intersectionExponent, depthExponent);
+ }
+
+ if (includeHyponyms) {
+
+ fathomHyponymsExponential(wordSense, wordSense.getNode().synset,
+ relvWords, depth, depth, intersectionExponent, depthExponent);
+ }
+
+ if (includeMeronyms) {
+
+ fathomMeronymsExponential(wordSense, wordSense.getNode().synset,
+ relvWords, depth, depth, intersectionExponent, depthExponent);
+
+ }
+
+ if (includeHolonyms) {
+
+ fathomHolonymsExponential(wordSense, wordSense.getNode().synset,
+ relvWords, depth, depth, intersectionExponent, depthExponent);
+
+ }
+
+ }
+
+ return scoredSenses;
+
+ }
+
+ private void fathomHypernyms(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double depthScoreWeight) {
+ if (depth == 0)
+ return;
+
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
+
+ Node childNode = new Node(child, relvGlossWords);
+
+ childNode.setHypernyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getHypernyms(), relvWords));
+ for (Synset hypernym : childNode.getHypernyms()) {
+ fathomHypernyms(wordSense, hypernym, relvGlossWords, depth - 1, maxDepth,
+ depthScoreWeight);
+ }
+ }
+
+ private void fathomHypernymsExponential(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double intersectionExponent, double depthScoreExponent) {
+ if (depth == 0)
+ return;
+
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
+
+ Node childNode = new Node(child, relvGlossWords);
+
+ childNode.setHypernyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(assessFeature(childNode.getHypernyms(), relvWords),
+ intersectionExponent) / Math.pow(depth, depthScoreExponent));
+ for (Synset hypernym : childNode.getHypernyms()) {
+
+ fathomHypernymsExponential(wordSense, hypernym, relvGlossWords,
+ depth - 1, maxDepth, intersectionExponent, depthScoreExponent);
+ }
+ }
+
+ private void fathomHyponyms(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double depthScoreWeight) {
+ if (depth == 0)
+ return;
+
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
+
+ Node childNode = new Node(child, relvGlossWords);
+
+ childNode.setHyponyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getHyponyms(), relvWords));
+ for (Synset hyponym : childNode.getHyponyms()) {
+
+ fathomHyponyms(wordSense, hyponym, relvGlossWords, depth - 1, maxDepth,
+ depthScoreWeight);
+ }
+ }
+
+ private void fathomHyponymsExponential(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double intersectionExponent, double depthScoreExponent) {
+ if (depth == 0)
+ return;
+
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
+
+ Node childNode = new Node(child, relvGlossWords);
+
+ childNode.setHyponyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(assessFeature(childNode.getHyponyms(), relvWords),
+ intersectionExponent) / Math.pow(depth, depthScoreExponent));
+ for (Synset hyponym : childNode.getHyponyms()) {
+
+ fathomHyponymsExponential(wordSense, hyponym, relvGlossWords, depth - 1,
+ maxDepth, intersectionExponent, depthScoreExponent);
+ }
+ }
+
+ private void fathomMeronyms(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double depthScoreWeight) {
+ if (depth == 0)
+ return;
+
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
+
+ Node childNode = new Node(child, relvGlossWords);
+
+ childNode.setMeronyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getMeronyms(), relvWords));
+ for (Synset meronym : childNode.getMeronyms()) {
+
+ fathomMeronyms(wordSense, meronym, relvGlossWords, depth - 1, maxDepth,
+ depthScoreWeight);
+ }
+ }
+
+ private void fathomMeronymsExponential(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double intersectionExponent, double depthScoreExponent) {
+ if (depth == 0)
+ return;
+
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
+
+ Node childNode = new Node(child, relvGlossWords);
+
+ childNode.setMeronyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(assessFeature(childNode.getMeronyms(), relvWords),
+ intersectionExponent) / Math.pow(depth, depthScoreExponent));
+ for (Synset meronym : childNode.getMeronyms()) {
+
+ fathomMeronymsExponential(wordSense, meronym, relvGlossWords, depth - 1,
+ maxDepth, intersectionExponent, depthScoreExponent);
+ }
+ }
+
+ private void fathomHolonyms(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double depthScoreWeight) {
+ if (depth == 0)
+ return;
+
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
+
+ Node childNode = new Node(child, relvGlossWords);
+
+ childNode.setHolonyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(depthScoreWeight, maxDepth - depth + 1)
+ * assessFeature(childNode.getHolonyms(), relvWords));
+ for (Synset holonym : childNode.getHolonyms()) {
+
+ fathomHolonyms(wordSense, holonym, relvGlossWords, depth - 1, maxDepth,
+ depthScoreWeight);
+ }
+ }
+
+ private void fathomHolonymsExponential(WordSense wordSense, Synset child,
+ ArrayList<WordPOS> relvWords, int depth, int maxDepth,
+ double intersectionExponent, double depthScoreExponent) {
+ if (depth == 0)
+ return;
+
+ String[] tokenizedGloss = Loader.getTokenizer().tokenize(
+ child.getGloss().toString());
+ ArrayList<WordPOS> relvGlossWords = PreProcessor
+ .getAllRelevantWords(tokenizedGloss);
+
+ Node childNode = new Node(child, relvGlossWords);
+
+ childNode.setHolonyms();
+ wordSense.setScore(wordSense.getScore()
+ + Math.pow(assessFeature(childNode.getHolonyms(), relvWords),
+ intersectionExponent) / Math.pow(depth, depthScoreExponent));
+ for (Synset holonym : childNode.getHolonyms()) {
+
+ fathomHolonymsExponential(wordSense, holonym, relvGlossWords, depth - 1,
+ maxDepth, intersectionExponent, depthScoreExponent);
+ }
+ }
+
+ private int assessFeature(ArrayList<Synset> featureSynsets,
+ ArrayList<WordPOS> relevantWords) {
+ int count = 0;
+ for (Synset synset : featureSynsets) {
+ Node subNode = new Node(synset, relevantWords);
+
+ String[] tokenizedSense = Loader.getTokenizer().tokenize(
+ subNode.getSense());
+ ArrayList<WordPOS> relvSenseWords = PreProcessor
+ .getAllRelevantWords(tokenizedSense);
+
+ for (WordPOS senseWord : relvSenseWords) {
+ for (WordPOS sentenceWord : relevantWords) {
+ if (sentenceWord.isStemEquivalent(senseWord)) {
+ count = count + 1;
+ }
+ }
+ }
+ }
+ return count;
+ }
+
+ private int assessSynonyms(ArrayList<WordPOS> synonyms,
+ ArrayList<WordPOS> relevantWords) {
+ int count = 0;
+
+ for (WordPOS synonym : synonyms) {
+ for (WordPOS sentenceWord : relevantWords) {
+ // TODO try to switch to lemmatizer
+ if (sentenceWord.isStemEquivalent(synonym)) {
+ count = count + 1;
+ }
+ }
+
+ }
+
+ return count;
+ }
+
+ public ArrayList<WordSense> updateSenses(ArrayList<Node> nodes) {
+
+ ArrayList<WordSense> scoredSenses = new ArrayList<WordSense>();
+
+ for (int i = 0; i < nodes.size(); i++) {
+ ArrayList<WordPOS> sensesComponents = PreProcessor
+ .getAllRelevantWords(PreProcessor.tokenize(nodes.get(i).getSense()));
+ WordSense wordSense = new WordSense();
+ nodes.get(i).setSenseRelevantWords(sensesComponents);
+ wordSense.setNode(nodes.get(i));
+ wordSense.setId(i);
+ scoredSenses.add(wordSense);
+ }
+ return scoredSenses;
+
+ }
+
+ // disambiguates a WTDLesk and returns an array of sense indexes from WordNet
+ // ordered by their score
+ @Override
+ public String[] disambiguate(String[] inputText, int inputWordIndex) {
+ WTDLesk wtd = new WTDLesk(inputText, inputWordIndex);
+ ArrayList<WordSense> wsenses = null;
+
+ switch (this.params.leskType) {
+ case LESK_BASIC:
+ wsenses = basic(wtd);
+ break;
+ case LESK_BASIC_CTXT:
+ wsenses = basicContextual(wtd);
+ break;
+ case LESK_BASIC_CTXT_WIN:
+ wsenses = basicContextual(wtd, this.params.win_b_size);
+ break;
+ case LESK_BASIC_CTXT_WIN_BF:
+ wsenses = basicContextual(wtd, this.params.win_b_size,
+ this.params.win_f_size);
+ break;
+ case LESK_EXT:
+ wsenses = extended(wtd, this.params.depth, this.params.depth_weight,
+ this.params.fathom_synonyms, this.params.fathom_hypernyms,
+ this.params.fathom_hyponyms, this.params.fathom_meronyms,
+ this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_CTXT:
+ wsenses = extendedContextual(wtd, this.params.depth,
+ this.params.depth_weight, this.params.fathom_synonyms,
+ this.params.fathom_hypernyms, this.params.fathom_hyponyms,
+ this.params.fathom_meronyms, this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_CTXT_WIN:
+ wsenses = extendedContextual(wtd, this.params.win_b_size,
+ this.params.depth, this.params.depth_weight,
+ this.params.fathom_synonyms, this.params.fathom_hypernyms,
+ this.params.fathom_hyponyms, this.params.fathom_meronyms,
+ this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_CTXT_WIN_BF:
+ wsenses = extendedContextual(wtd, this.params.win_b_size,
+ this.params.win_f_size, this.params.depth, this.params.depth_weight,
+ this.params.fathom_synonyms, this.params.fathom_hypernyms,
+ this.params.fathom_hyponyms, this.params.fathom_meronyms,
+ this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_EXP:
+ wsenses = extendedExponential(wtd, this.params.depth, this.params.iexp,
+ this.params.dexp, this.params.fathom_synonyms,
+ this.params.fathom_hypernyms, this.params.fathom_hyponyms,
+ this.params.fathom_meronyms, this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_EXP_CTXT:
+ wsenses = extendedExponentialContextual(wtd, this.params.depth,
+ this.params.iexp, this.params.dexp, this.params.fathom_synonyms,
+ this.params.fathom_hypernyms, this.params.fathom_hyponyms,
+ this.params.fathom_meronyms, this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_EXP_CTXT_WIN:
+ wsenses = extendedExponentialContextual(wtd, this.params.win_b_size,
+ this.params.depth, this.params.iexp, this.params.dexp,
+ this.params.fathom_synonyms, this.params.fathom_hypernyms,
+ this.params.fathom_hyponyms, this.params.fathom_meronyms,
+ this.params.fathom_holonyms);
+ break;
+ case LESK_EXT_EXP_CTXT_WIN_BF:
+ wsenses = extendedExponentialContextual(wtd, this.params.win_b_size,
+ this.params.win_f_size, this.params.depth, this.params.iexp,
+ this.params.dexp, this.params.fathom_synonyms,
+ this.params.fathom_hypernyms, this.params.fathom_hyponyms,
+ this.params.fathom_meronyms, this.params.fathom_holonyms);
+ break;
+ }
+
+ wsenses = extendedExponentialContextual(wtd, LeskParameters.DFLT_WIN_SIZE,
+ LeskParameters.DFLT_DEPTH, LeskParameters.DFLT_IEXP,
+ LeskParameters.DFLT_DEXP, true, true, true, true, true);
+ Collections.sort(wsenses);
+
+ String[] senses = new String[wsenses.size()];
+ for (int i = 0; i < wsenses.size(); i++) {
+ senses[i] = wsenses.get(i).getSense();
+ }
+ return senses;
+ }
+
+ @Override
+ public String[] disambiguate(String[] inputText, Span[] inputWordSpans) {
+ // TODO need to work on spans
+ return null;
+ }
-}
\ No newline at end of file
+}
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java Thu Jun 25 09:20:30 2015
@@ -2,106 +2,84 @@ package opennlp.tools.disambiguator.lesk
public class LeskParameters {
- // VARIATIONS
- public static enum LESK_TYPE {
- LESK_BASIC,
- LESK_BASIC_CTXT,
- LESK_BASIC_CTXT_WIN,
- LESK_BASIC_CTXT_WIN_BF,
- LESK_EXT,
- LESK_EXT_CTXT,
- LESK_EXT_CTXT_WIN,
- LESK_EXT_CTXT_WIN_BF,
- LESK_EXT_EXP,
- LESK_EXT_EXP_CTXT,
- LESK_EXT_EXP_CTXT_WIN,
- LESK_EXT_EXP_CTXT_WIN_BF,
- }
-
- // DEFAULTS
- protected static final LESK_TYPE DFLT_LESK_TYPE = LESK_TYPE.LESK_EXT_EXP_CTXT_WIN;
- protected static final int DFLT_WIN_SIZE = 4;
- protected static final int DFLT_DEPTH = 3;
- protected static final double DFLT_IEXP = 0.3;
- protected static final double DFLT_DEXP = 0.3;
-
-
- public LESK_TYPE leskType;
- public int win_f_size;
- public int win_b_size;
- public int depth;
-
- public boolean fathom_synonyms;
- public boolean fathom_hypernyms;
- public boolean fathom_hyponyms;
- public boolean fathom_meronyms;
- public boolean fathom_holonyms;
-
- public double depth_weight;
- public double iexp;
- public double dexp;
-
-
- public LeskParameters(){
- this.setDefaults();
- }
-
- public void setDefaults(){
- this.leskType = LeskParameters.DFLT_LESK_TYPE;
- this.win_f_size = LeskParameters.DFLT_WIN_SIZE;
- this.win_b_size = LeskParameters.DFLT_WIN_SIZE;
- this.depth = LeskParameters.DFLT_DEPTH;
- this.iexp = LeskParameters.DFLT_IEXP;
- this.dexp = LeskParameters.DFLT_DEXP;
- this.fathom_holonyms = true;
- this.fathom_hypernyms = true;
- this.fathom_hyponyms = true;
- this.fathom_meronyms = true;
- this.fathom_synonyms = true;
- }
-
- // Parameter Validation
- // TODO make isSet for semantic feature booleans
- public boolean isValid(){
-
- switch(this.leskType){
- case LESK_BASIC:
- case LESK_BASIC_CTXT :
- return true;
- case LESK_BASIC_CTXT_WIN :
- return (this.win_b_size==this.win_f_size)
- && this.win_b_size>=0 ;
- case LESK_BASIC_CTXT_WIN_BF :
- return (this.win_b_size>=0)
- && (this.win_f_size>=0) ;
- case LESK_EXT :
- case LESK_EXT_CTXT :
- return (this.depth>=0)
- && (this.depth_weight >= 0);
-
- case LESK_EXT_CTXT_WIN :
- case LESK_EXT_CTXT_WIN_BF :
- return (this.depth>=0)
- && (this.depth_weight >= 0)
- && (this.win_b_size>=0)
- && (this.win_f_size>=0);
-
- case LESK_EXT_EXP :
- case LESK_EXT_EXP_CTXT :
- return (this.depth>=0)
- && (this.dexp >= 0)
- && (this.iexp>=0) ;
-
- case LESK_EXT_EXP_CTXT_WIN :
- case LESK_EXT_EXP_CTXT_WIN_BF :
- return (this.depth>=0)
- && (this.dexp >= 0)
- && (this.iexp>=0)
- && (this.win_b_size>=0)
- && (this.win_f_size>=0);
- default :
- return false;
- }
- }
+ // VARIATIONS
+ public static enum LESK_TYPE {
+ LESK_BASIC, LESK_BASIC_CTXT, LESK_BASIC_CTXT_WIN, LESK_BASIC_CTXT_WIN_BF, LESK_EXT, LESK_EXT_CTXT, LESK_EXT_CTXT_WIN, LESK_EXT_CTXT_WIN_BF, LESK_EXT_EXP, LESK_EXT_EXP_CTXT, LESK_EXT_EXP_CTXT_WIN, LESK_EXT_EXP_CTXT_WIN_BF,
+ }
+
+ // DEFAULTS
+ protected static final LESK_TYPE DFLT_LESK_TYPE = LESK_TYPE.LESK_EXT_EXP_CTXT_WIN;
+ protected static final int DFLT_WIN_SIZE = 4;
+ protected static final int DFLT_DEPTH = 3;
+ protected static final double DFLT_IEXP = 0.3;
+ protected static final double DFLT_DEXP = 0.3;
+
+ public LESK_TYPE leskType;
+ public int win_f_size;
+ public int win_b_size;
+ public int depth;
+
+ public boolean fathom_synonyms;
+ public boolean fathom_hypernyms;
+ public boolean fathom_hyponyms;
+ public boolean fathom_meronyms;
+ public boolean fathom_holonyms;
+
+ public double depth_weight;
+ public double iexp;
+ public double dexp;
+
+ public LeskParameters() {
+ this.setDefaults();
+ }
+
+ public void setDefaults() {
+ this.leskType = LeskParameters.DFLT_LESK_TYPE;
+ this.win_f_size = LeskParameters.DFLT_WIN_SIZE;
+ this.win_b_size = LeskParameters.DFLT_WIN_SIZE;
+ this.depth = LeskParameters.DFLT_DEPTH;
+ this.iexp = LeskParameters.DFLT_IEXP;
+ this.dexp = LeskParameters.DFLT_DEXP;
+ this.fathom_holonyms = true;
+ this.fathom_hypernyms = true;
+ this.fathom_hyponyms = true;
+ this.fathom_meronyms = true;
+ this.fathom_synonyms = true;
+ }
+
+ // Parameter Validation
+ // TODO make isSet for semantic feature booleans
+ public boolean isValid() {
+
+ switch (this.leskType) {
+ case LESK_BASIC:
+ case LESK_BASIC_CTXT:
+ return true;
+ case LESK_BASIC_CTXT_WIN:
+ return (this.win_b_size == this.win_f_size) && this.win_b_size >= 0;
+ case LESK_BASIC_CTXT_WIN_BF:
+ return (this.win_b_size >= 0) && (this.win_f_size >= 0);
+ case LESK_EXT:
+ case LESK_EXT_CTXT:
+ return (this.depth >= 0) && (this.depth_weight >= 0);
+
+ case LESK_EXT_CTXT_WIN:
+ case LESK_EXT_CTXT_WIN_BF:
+ return (this.depth >= 0) && (this.depth_weight >= 0)
+ && (this.win_b_size >= 0) && (this.win_f_size >= 0);
+
+ case LESK_EXT_EXP:
+ case LESK_EXT_EXP_CTXT:
+ return (this.depth >= 0) && (this.dexp >= 0) && (this.iexp >= 0);
+
+ case LESK_EXT_EXP_CTXT_WIN:
+ case LESK_EXT_EXP_CTXT_WIN_BF:
+ return (this.depth >= 0) && (this.dexp >= 0) && (this.iexp >= 0)
+ && (this.win_b_size >= 0) && (this.win_f_size >= 0);
+ default:
+ return false;
+ }
+ }
}
+
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java Thu Jun 25 09:20:30 2015
@@ -2,14 +2,9 @@ package opennlp.tools.disambiguator.lesk
import opennlp.tools.disambiguator.WordToDisambiguate;
+public class WTDLesk extends WordToDisambiguate {
-public class WTDLesk extends WordToDisambiguate{
-
- public WTDLesk(String[] sentence, int wordIndex) {
- super(sentence,wordIndex,-1);
- }
-
-
-
-
-}
\ No newline at end of file
+ public WTDLesk(String[] sentence, int wordIndex) {
+ super(sentence, wordIndex, -1);
+ }
+}
Modified: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java?rev=1687455&r1=1687454&r2=1687455&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java Thu Jun 25 09:20:30 2015
@@ -1,19 +1,12 @@
package opennlp.tools.disambiguator;
+
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
-import java.util.ArrayList;
-
-import org.junit.Test;
import opennlp.tools.cmdline.postag.POSModelLoader;
-import opennlp.tools.disambiguator.Constants;
-import opennlp.tools.disambiguator.Loader;
-import opennlp.tools.disambiguator.WordSense;
-import opennlp.tools.disambiguator.ims.FeaturesExtractor;
import opennlp.tools.disambiguator.lesk.Lesk;
import opennlp.tools.disambiguator.lesk.LeskParameters;
-import opennlp.tools.disambiguator.lesk.WTDLesk;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTagger;
import opennlp.tools.postag.POSTaggerME;
@@ -21,63 +14,63 @@ import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
+import org.junit.Test;
public class Tester {
- @Test
- public static void main(String[] args) {
-
+ @Test
+ public static void main(String[] args) {
- String sentence = "I went fishing for some sea bass.";
- TokenizerModel TokenizerModel;
-
- try {
- TokenizerModel = new TokenizerModel(new FileInputStream("src\\test\\resources\\opennlp\\tools\\disambiguator\\en-token.bin"));
- Tokenizer tokenizer = new TokenizerME(TokenizerModel);
-
- String[] words = tokenizer.tokenize(sentence);
-
- POSModel posTaggerModel = new POSModelLoader().load(new File("src\\test\\resources\\opennlp\\tools\\disambiguator\\en-pos-maxent.bin"));
- POSTagger tagger = new POSTaggerME(posTaggerModel);
-
-
- Constants.print("\ntokens :");
- Constants.print(words);
- Constants.print(tagger.tag(words));
-
- Constants.print("\ntesting default lesk :");
- Lesk lesk = new Lesk();
- Constants.print(lesk.disambiguate(words, 6));
-
- Constants.print("\ntesting with null params :");
- lesk.setParams(null);
- Constants.print(lesk.disambiguate(words, 6));
-
- Constants.print("\ntesting with default params");
- lesk.setParams(new LeskParameters());
- Constants.print(lesk.disambiguate(words, 6));
-
- Constants.print("\ntesting with custom params :");
- LeskParameters leskParams = new LeskParameters();
- leskParams.leskType = LeskParameters.LESK_TYPE.LESK_BASIC_CTXT_WIN_BF;
- leskParams.win_b_size = 4;
- leskParams.depth = 3;
- lesk.setParams(leskParams);
- Constants.print(lesk.disambiguate(words, 6));
-
- /*
- Constants.print("\ntesting with wrong params should throw exception :");
- LeskParameters leskWrongParams = new LeskParameters();
- leskWrongParams.depth = -1;
- lesk.setParams(leskWrongParams);
- Constants.print(lesk.disambiguate(words, 6));
- */
-
- } catch (IOException e) {
- e.printStackTrace();
- }
+ String sentence = "I went fishing for some sea bass.";
+ TokenizerModel TokenizerModel;
+ try {
+ TokenizerModel = new TokenizerModel(new FileInputStream(
+ "src\\test\\resources\\opennlp\\tools\\disambiguator\\en-token.bin"));
+ Tokenizer tokenizer = new TokenizerME(TokenizerModel);
+
+ String[] words = tokenizer.tokenize(sentence);
+
+ POSModel posTaggerModel = new POSModelLoader()
+ .load(new File(
+ "src\\test\\resources\\opennlp\\tools\\disambiguator\\en-pos-maxent.bin"));
+ POSTagger tagger = new POSTaggerME(posTaggerModel);
+
+ Constants.print("\ntokens :");
+ Constants.print(words);
+ Constants.print(tagger.tag(words));
+
+ Constants.print("\ntesting default lesk :");
+ Lesk lesk = new Lesk();
+ Constants.print(lesk.disambiguate(words, 6));
+
+ Constants.print("\ntesting with null params :");
+ lesk.setParams(null);
+ Constants.print(lesk.disambiguate(words, 6));
+
+ Constants.print("\ntesting with default params");
+ lesk.setParams(new LeskParameters());
+ Constants.print(lesk.disambiguate(words, 6));
+
+ Constants.print("\ntesting with custom params :");
+ LeskParameters leskParams = new LeskParameters();
+ leskParams.leskType = LeskParameters.LESK_TYPE.LESK_BASIC_CTXT_WIN_BF;
+ leskParams.win_b_size = 4;
+ leskParams.depth = 3;
+ lesk.setParams(leskParams);
+ Constants.print(lesk.disambiguate(words, 6));
+
+ /*
+ * Constants.print("\ntesting with wrong params should throw exception :");
+ * LeskParameters leskWrongParams = new LeskParameters();
+ * leskWrongParams.depth = -1; lesk.setParams(leskWrongParams);
+ * Constants.print(lesk.disambiguate(words, 6));
+ */
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
- }
+ }
}