You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2015/07/03 13:17:26 UTC
svn commit: r1689001 - in /opennlp/sandbox/opennlp-wsd/src:
main/java/opennlp/tools/disambiguator/
main/java/opennlp/tools/disambiguator/ims/
main/java/opennlp/tools/disambiguator/lesk/
test/java/opennlp/tools/disambiguator/
Author: joern
Date: Fri Jul 3 11:17:25 2015
New Revision: 1689001
URL: http://svn.apache.org/r1689001
Log:
No jira, set eol-style property to native.
Modified:
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java (contents, props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java (contents, props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DistributionInstance.java (contents, props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java (contents, props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java (contents, props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java (contents, props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSEventStream.java (contents, props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java (contents, props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java (props changed)
opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java (props changed)
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java (contents, props changed)
opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java (props changed)
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Constants.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java?rev=1689001&r1=1689000&r2=1689001&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java Fri Jul 3 11:17:25 2015
@@ -1,382 +1,382 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-
-import opennlp.tools.disambiguator.DictionaryInstance;
-import opennlp.tools.disambiguator.DistributionInstance;
-import opennlp.tools.disambiguator.ims.WTDIMS;
-
-public class DataExtractor {
-
- public DataExtractor() {
- super();
- }
-
- /**
- * Extract the dictionary from the dictionary XML file and map the senses
- */
- private ArrayList<DictionaryInstance> extractDictionary(String xmlLocation) {
-
- ArrayList<DictionaryInstance> dictionary = new ArrayList<DictionaryInstance>();
-
- // HashMap<Integer, DictionaryInstance> dictionary = new HashMap<Integer,
- // DictionaryInstance>();
-
- try {
-
- File xmlFile = new File(xmlLocation);
- DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
- DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
- Document doc = dBuilder.parse(xmlFile);
- doc.getDocumentElement().normalize();
-
- NodeList nLexelts = doc.getElementsByTagName("lexelt");
-
- int index = 0;
-
- for (int i = 0; i < nLexelts.getLength(); i++) {
-
- Node nLexelt = nLexelts.item(i);
-
- Element eLexelt = (Element) nLexelt;
-
- String word = eLexelt.getAttribute("item");
-
- if (nLexelt.getNodeType() == Node.ELEMENT_NODE) {
-
- NodeList nSenses = eLexelt.getChildNodes();
-
- for (int j = 0; j < nSenses.getLength(); j++) {
-
- if (nSenses.item(j).getNodeType() == Node.ELEMENT_NODE) {
-
- Element eSense = (Element) nSenses.item(j);
-
- int ind = index; // rather use this than the ID used by default
- String id = eSense.getAttribute("id");
- String source = eSense.getAttribute("source");
- String[] synset = eSense.getAttribute("synset").split("\\s");
- String gloss = eSense.getAttribute("gloss");
-
- DictionaryInstance wd = new DictionaryInstance(ind, word, id,
- source, synset, gloss);
-
- dictionary.add(wd);
- index++;
- }
- }
-
- }
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
-
- return dictionary;
-
- }
-
- private HashMap<Integer, ArrayList<String>> getEquivalentSense(
- String sensemapFile) {
-
- HashMap<Integer, ArrayList<String>> mappedSenses = new HashMap<Integer, ArrayList<String>>();
-
- try (BufferedReader wordsList = new BufferedReader(new FileReader(
- sensemapFile))) {
-
- int index = 0;
-
- String line;
-
- // Read the file
- while ((line = wordsList.readLine()) != null) {
-
- String[] temp = line.split("\\s");
-
- ArrayList<String> tempSenses = new ArrayList<String>();
-
- for (String sense : temp) {
- if (sense.length() > 1) {
- // System.out.println(sense);
- tempSenses.add(sense);
- }
- }
-
- mappedSenses.put(index, tempSenses);
- // System.out.println(index);
- index++;
-
- }
-
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- return mappedSenses;
-
- }
-
- private HashMap<String, ArrayList<DictionaryInstance>> extractOptimalDictionary(
- String xmlLocation, String sensemapFile) {
-
- HashMap<String, ArrayList<DictionaryInstance>> optimizedDictionary = new HashMap<String, ArrayList<DictionaryInstance>>();
-
- HashMap<Integer, ArrayList<String>> equivalentSenses = getEquivalentSense(sensemapFile);
-
- ArrayList<DictionaryInstance> dictionary = extractDictionary(xmlLocation);
-
- for (int mapKey : equivalentSenses.keySet()) {
- ArrayList<String> sensesIds = equivalentSenses.get(mapKey);
- ArrayList<DictionaryInstance> optimizedDictionaryInstance = new ArrayList<DictionaryInstance>();
-
- String word = "";
-
- for (String senseId : sensesIds) {
- for (int i = 0; i < dictionary.size(); i++) {
- if (dictionary.get(i).getId().equals(senseId)) {
- optimizedDictionaryInstance.add(dictionary.get(i));
- word = dictionary.get(i).getWord();
- word = word + "_" + mapKey;
- break;
- }
- }
-
- }
-
- optimizedDictionary.put(word, optimizedDictionaryInstance);
- }
-
- return optimizedDictionary;
- }
-
- public HashMap<String, ArrayList<DictionaryInstance>> extractWordSenses(
- String xmlLocation, String sensemapFile, String wordTag) {
-
- /**
- * word tag has to be in the format "word.t" (e.g., "activate.v", "smart.a",
- * etc.)
- */
-
- HashMap<String, ArrayList<DictionaryInstance>> wordSenses = new HashMap<String, ArrayList<DictionaryInstance>>();
-
- HashMap<String, ArrayList<DictionaryInstance>> optimalDictionary = extractOptimalDictionary(
- xmlLocation, sensemapFile);
-
- int i = 0;
- for (String key : optimalDictionary.keySet()) {
- if (key.startsWith(wordTag)) {
- String newKey = wordTag + "_" + i;
- wordSenses.put(newKey, optimalDictionary.get(key));
- i++;
- }
- }
-
- return wordSenses;
- }
-
- public HashMap<String, String> getDictionaryInstance(String xmlLocation,
- String sensemapFile, String wordTag) {
-
- HashMap<String, ArrayList<DictionaryInstance>> dict = extractWordSenses(
- xmlLocation, sensemapFile, wordTag);
-
- HashMap<String, String> senses = new HashMap<String, String>();
-
- for (String key : dict.keySet()) {
- String sense = dict.get(key).get(0).getGloss();
- senses.put(key, sense);
- }
-
- return senses;
-
- }
-
- /**
- * Extract the Dictionary Map [USELESS UNLESS USED FOR STATISTICS LATER !!!]
- */
-
- public HashMap<Integer, DistributionInstance> extractWords(String listOfWords) {
-
- HashMap<Integer, DistributionInstance> instances = new HashMap<Integer, DistributionInstance>();
-
- try (BufferedReader wordsList = new BufferedReader(new FileReader(
- listOfWords))) {
-
- String line;
-
- int index = 0;
-
- // Read the file
- while ((line = wordsList.readLine()) != null) {
-
- String[] temp = line.split("\\t");
-
- String[] wordPos = temp[0].split("\\.");
-
- String tag;
-
- if (wordPos[1].equals("n")) {
- tag = "noun";
- } else if (wordPos[1].equals("v")) {
- tag = "verb";
- } else if (wordPos[1].equals("a")) {
- tag = "adjective";
- } else {
- tag = "adverb";
- }
-
- DistributionInstance word = new DistributionInstance(wordPos[0], tag,
- Integer.parseInt(temp[1]), Integer.parseInt(temp[2]));
-
- instances.put(index, word);
-
- index++;
-
- }
-
- } catch (IOException e) {
- e.printStackTrace();
- }
-
- return instances;
- }
-
- /**
- * Extract the training instances from the training/test set File
- */
-
- public ArrayList<WTDIMS> extractWSDInstances(String xmlDataSet) {
-
- ArrayList<WTDIMS> setInstances = new ArrayList<WTDIMS>();
-
- try {
-
- File xmlFile = new File(xmlDataSet);
- DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
- DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
- Document doc = dBuilder.parse(xmlFile);
-
- doc.getDocumentElement().normalize();
-
- NodeList lexelts = doc.getElementsByTagName("lexelt");
-
- for (int i = 0; i < lexelts.getLength(); i++) {
-
- Node nLexelt = lexelts.item(i);
-
- if (nLexelt.getNodeType() == Node.ELEMENT_NODE) {
- Element eLexelt = (Element) nLexelt;
-
- NodeList nInstances = nLexelt.getChildNodes();
-
- for (int j = 1; j < nInstances.getLength(); j++) {
-
- Node nInstance = nInstances.item(j);
-
- if (nInstance.getNodeType() == Node.ELEMENT_NODE) {
-
- Element eInstance = (Element) nInstance;
-
- String[] wordPos = eLexelt.getAttribute("item").split("\\.");
- String word = wordPos[0]; // Word
- String tag; // Part of Speech
-
- if (wordPos[1].equals("n")) {
- tag = "noun";
- } else if (wordPos[1].equals("v")) {
- tag = "verb";
- } else if (wordPos[1].equals("a")) {
- tag = "adjective";
- } else {
- tag = "adverb";
- }
-
- String id = eInstance.getAttribute("id");
- String source = eInstance.getAttribute("docsrc");
-
- ArrayList<String> answers = new ArrayList<String>();
- String sentence = "";
- String rawWord = "";
-
- NodeList nChildren = nInstance.getChildNodes();
-
- for (int k = 1; k < nChildren.getLength(); k++) {
- Node nChild = nChildren.item(k);
-
- if (nChild.getNodeName().equals("answer")) {
- // String answer =
- // nChild.getAttributes().item(0).getTextContent();
- String senseid = nChild.getAttributes().item(1)
- .getTextContent();
-
- String temp = senseid;
- // String[] temp = { answer, senseid };
- answers.add(temp);
- }
-
- if (nChild.getNodeName().equals("context")) {
- sentence = ((Element) nChild).getTextContent();
-
- if (nChild.hasChildNodes()) {
- // textbefore =
- // nChild.getChildNodes().item(0).getTextContent();
- rawWord = nChild.getChildNodes().item(1).getTextContent();
- // textAfter =
- // nChild.getChildNodes().item(2).getTextContent();
- // System.out.println(rawWord);
- }
- }
-
- }
-
- WTDIMS wordToDisambiguate = new WTDIMS(word, answers, sentence,
- rawWord);
- setInstances.add(wordToDisambiguate);
- // System.out.print(index + "\t");
- // System.out.println(wordToDisambiguate.toString());
- }
- }
- }
- }
-
- } catch (Exception e) {
- e.printStackTrace();
- }
-
- return setInstances;
-
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+import opennlp.tools.disambiguator.DictionaryInstance;
+import opennlp.tools.disambiguator.DistributionInstance;
+import opennlp.tools.disambiguator.ims.WTDIMS;
+
+public class DataExtractor {
+
+ public DataExtractor() {
+ super();
+ }
+
+ /**
+ * Extract the dictionary from the dictionary XML file and map the senses
+ */
+ private ArrayList<DictionaryInstance> extractDictionary(String xmlLocation) {
+
+ ArrayList<DictionaryInstance> dictionary = new ArrayList<DictionaryInstance>();
+
+ // HashMap<Integer, DictionaryInstance> dictionary = new HashMap<Integer,
+ // DictionaryInstance>();
+
+ try {
+
+ File xmlFile = new File(xmlLocation);
+ DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+ Document doc = dBuilder.parse(xmlFile);
+ doc.getDocumentElement().normalize();
+
+ NodeList nLexelts = doc.getElementsByTagName("lexelt");
+
+ int index = 0;
+
+ for (int i = 0; i < nLexelts.getLength(); i++) {
+
+ Node nLexelt = nLexelts.item(i);
+
+ Element eLexelt = (Element) nLexelt;
+
+ String word = eLexelt.getAttribute("item");
+
+ if (nLexelt.getNodeType() == Node.ELEMENT_NODE) {
+
+ NodeList nSenses = eLexelt.getChildNodes();
+
+ for (int j = 0; j < nSenses.getLength(); j++) {
+
+ if (nSenses.item(j).getNodeType() == Node.ELEMENT_NODE) {
+
+ Element eSense = (Element) nSenses.item(j);
+
+ int ind = index; // rather use this than the ID used by default
+ String id = eSense.getAttribute("id");
+ String source = eSense.getAttribute("source");
+ String[] synset = eSense.getAttribute("synset").split("\\s");
+ String gloss = eSense.getAttribute("gloss");
+
+ DictionaryInstance wd = new DictionaryInstance(ind, word, id,
+ source, synset, gloss);
+
+ dictionary.add(wd);
+ index++;
+ }
+ }
+
+ }
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ return dictionary;
+
+ }
+
+ private HashMap<Integer, ArrayList<String>> getEquivalentSense(
+ String sensemapFile) {
+
+ HashMap<Integer, ArrayList<String>> mappedSenses = new HashMap<Integer, ArrayList<String>>();
+
+ try (BufferedReader wordsList = new BufferedReader(new FileReader(
+ sensemapFile))) {
+
+ int index = 0;
+
+ String line;
+
+ // Read the file
+ while ((line = wordsList.readLine()) != null) {
+
+ String[] temp = line.split("\\s");
+
+ ArrayList<String> tempSenses = new ArrayList<String>();
+
+ for (String sense : temp) {
+ if (sense.length() > 1) {
+ // System.out.println(sense);
+ tempSenses.add(sense);
+ }
+ }
+
+ mappedSenses.put(index, tempSenses);
+ // System.out.println(index);
+ index++;
+
+ }
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ return mappedSenses;
+
+ }
+
+ private HashMap<String, ArrayList<DictionaryInstance>> extractOptimalDictionary(
+ String xmlLocation, String sensemapFile) {
+
+ HashMap<String, ArrayList<DictionaryInstance>> optimizedDictionary = new HashMap<String, ArrayList<DictionaryInstance>>();
+
+ HashMap<Integer, ArrayList<String>> equivalentSenses = getEquivalentSense(sensemapFile);
+
+ ArrayList<DictionaryInstance> dictionary = extractDictionary(xmlLocation);
+
+ for (int mapKey : equivalentSenses.keySet()) {
+ ArrayList<String> sensesIds = equivalentSenses.get(mapKey);
+ ArrayList<DictionaryInstance> optimizedDictionaryInstance = new ArrayList<DictionaryInstance>();
+
+ String word = "";
+
+ for (String senseId : sensesIds) {
+ for (int i = 0; i < dictionary.size(); i++) {
+ if (dictionary.get(i).getId().equals(senseId)) {
+ optimizedDictionaryInstance.add(dictionary.get(i));
+ word = dictionary.get(i).getWord();
+ word = word + "_" + mapKey;
+ break;
+ }
+ }
+
+ }
+
+ optimizedDictionary.put(word, optimizedDictionaryInstance);
+ }
+
+ return optimizedDictionary;
+ }
+
+ public HashMap<String, ArrayList<DictionaryInstance>> extractWordSenses(
+ String xmlLocation, String sensemapFile, String wordTag) {
+
+ /**
+ * word tag has to be in the format "word.t" (e.g., "activate.v", "smart.a",
+ * etc.)
+ */
+
+ HashMap<String, ArrayList<DictionaryInstance>> wordSenses = new HashMap<String, ArrayList<DictionaryInstance>>();
+
+ HashMap<String, ArrayList<DictionaryInstance>> optimalDictionary = extractOptimalDictionary(
+ xmlLocation, sensemapFile);
+
+ int i = 0;
+ for (String key : optimalDictionary.keySet()) {
+ if (key.startsWith(wordTag)) {
+ String newKey = wordTag + "_" + i;
+ wordSenses.put(newKey, optimalDictionary.get(key));
+ i++;
+ }
+ }
+
+ return wordSenses;
+ }
+
+ public HashMap<String, String> getDictionaryInstance(String xmlLocation,
+ String sensemapFile, String wordTag) {
+
+ HashMap<String, ArrayList<DictionaryInstance>> dict = extractWordSenses(
+ xmlLocation, sensemapFile, wordTag);
+
+ HashMap<String, String> senses = new HashMap<String, String>();
+
+ for (String key : dict.keySet()) {
+ String sense = dict.get(key).get(0).getGloss();
+ senses.put(key, sense);
+ }
+
+ return senses;
+
+ }
+
+ /**
+ * Extract the Dictionary Map [USELESS UNLESS USED FOR STATISTICS LATER !!!]
+ */
+
+ public HashMap<Integer, DistributionInstance> extractWords(String listOfWords) {
+
+ HashMap<Integer, DistributionInstance> instances = new HashMap<Integer, DistributionInstance>();
+
+ try (BufferedReader wordsList = new BufferedReader(new FileReader(
+ listOfWords))) {
+
+ String line;
+
+ int index = 0;
+
+ // Read the file
+ while ((line = wordsList.readLine()) != null) {
+
+ String[] temp = line.split("\\t");
+
+ String[] wordPos = temp[0].split("\\.");
+
+ String tag;
+
+ if (wordPos[1].equals("n")) {
+ tag = "noun";
+ } else if (wordPos[1].equals("v")) {
+ tag = "verb";
+ } else if (wordPos[1].equals("a")) {
+ tag = "adjective";
+ } else {
+ tag = "adverb";
+ }
+
+ DistributionInstance word = new DistributionInstance(wordPos[0], tag,
+ Integer.parseInt(temp[1]), Integer.parseInt(temp[2]));
+
+ instances.put(index, word);
+
+ index++;
+
+ }
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ return instances;
+ }
+
+ /**
+ * Extract the training instances from the training/test set File
+ */
+
+ public ArrayList<WTDIMS> extractWSDInstances(String xmlDataSet) {
+
+ ArrayList<WTDIMS> setInstances = new ArrayList<WTDIMS>();
+
+ try {
+
+ File xmlFile = new File(xmlDataSet);
+ DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
+ DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
+ Document doc = dBuilder.parse(xmlFile);
+
+ doc.getDocumentElement().normalize();
+
+ NodeList lexelts = doc.getElementsByTagName("lexelt");
+
+ for (int i = 0; i < lexelts.getLength(); i++) {
+
+ Node nLexelt = lexelts.item(i);
+
+ if (nLexelt.getNodeType() == Node.ELEMENT_NODE) {
+ Element eLexelt = (Element) nLexelt;
+
+ NodeList nInstances = nLexelt.getChildNodes();
+
+ for (int j = 1; j < nInstances.getLength(); j++) {
+
+ Node nInstance = nInstances.item(j);
+
+ if (nInstance.getNodeType() == Node.ELEMENT_NODE) {
+
+ Element eInstance = (Element) nInstance;
+
+ String[] wordPos = eLexelt.getAttribute("item").split("\\.");
+ String word = wordPos[0]; // Word
+ String tag; // Part of Speech
+
+ if (wordPos[1].equals("n")) {
+ tag = "noun";
+ } else if (wordPos[1].equals("v")) {
+ tag = "verb";
+ } else if (wordPos[1].equals("a")) {
+ tag = "adjective";
+ } else {
+ tag = "adverb";
+ }
+
+ String id = eInstance.getAttribute("id");
+ String source = eInstance.getAttribute("docsrc");
+
+ ArrayList<String> answers = new ArrayList<String>();
+ String sentence = "";
+ String rawWord = "";
+
+ NodeList nChildren = nInstance.getChildNodes();
+
+ for (int k = 1; k < nChildren.getLength(); k++) {
+ Node nChild = nChildren.item(k);
+
+ if (nChild.getNodeName().equals("answer")) {
+ // String answer =
+ // nChild.getAttributes().item(0).getTextContent();
+ String senseid = nChild.getAttributes().item(1)
+ .getTextContent();
+
+ String temp = senseid;
+ // String[] temp = { answer, senseid };
+ answers.add(temp);
+ }
+
+ if (nChild.getNodeName().equals("context")) {
+ sentence = ((Element) nChild).getTextContent();
+
+ if (nChild.hasChildNodes()) {
+ // textbefore =
+ // nChild.getChildNodes().item(0).getTextContent();
+ rawWord = nChild.getChildNodes().item(1).getTextContent();
+ // textAfter =
+ // nChild.getChildNodes().item(2).getTextContent();
+ // System.out.println(rawWord);
+ }
+ }
+
+ }
+
+ WTDIMS wordToDisambiguate = new WTDIMS(word, answers, sentence,
+ rawWord);
+ setInstances.add(wordToDisambiguate);
+ // System.out.print(index + "\t");
+ // System.out.println(wordToDisambiguate.toString());
+ }
+ }
+ }
+ }
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ return setInstances;
+
+ }
+}
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DataExtractor.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java?rev=1689001&r1=1689000&r2=1689001&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java Fri Jul 3 11:17:25 2015
@@ -1,95 +1,95 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-public class DictionaryInstance {
-
- protected int index;
-
- protected String word;
-
- protected String id;
- protected String source;
- protected String[] synset;
- protected String gloss;
-
- public DictionaryInstance(int index, String word, String id, String source,
- String[] synset, String gloss) {
- super();
- this.index = index;
- this.word = word;
- this.id = id;
- this.source = source;
- this.synset = synset;
- this.gloss = gloss;
- }
-
- /**
- * Getters and Setters
- */
-
- public int getIndex() {
- return index;
- }
-
- public void setIndex(int index) {
- this.index = index;
- }
-
- public String getWord() {
- return word;
- }
-
- public void setWord(String word) {
- this.word = word;
- }
-
- public String getId() {
- return id;
- }
-
- public void setId(String id) {
- this.id = id;
- }
-
- public String getSource() {
- return source;
- }
-
- public void setSource(String source) {
- this.source = source;
- }
-
- public String[] getSynset() {
- return synset;
- }
-
- public void setSynset(String[] synset) {
- this.synset = synset;
- }
-
- public String getGloss() {
- return gloss;
- }
-
- public void setGloss(String gloss) {
- this.gloss = gloss;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+public class DictionaryInstance {
+
+ protected int index;
+
+ protected String word;
+
+ protected String id;
+ protected String source;
+ protected String[] synset;
+ protected String gloss;
+
+ public DictionaryInstance(int index, String word, String id, String source,
+ String[] synset, String gloss) {
+ super();
+ this.index = index;
+ this.word = word;
+ this.id = id;
+ this.source = source;
+ this.synset = synset;
+ this.gloss = gloss;
+ }
+
+ /**
+ * Getters and Setters
+ */
+
+ public int getIndex() {
+ return index;
+ }
+
+ public void setIndex(int index) {
+ this.index = index;
+ }
+
+ public String getWord() {
+ return word;
+ }
+
+ public void setWord(String word) {
+ this.word = word;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+
+ public String getSource() {
+ return source;
+ }
+
+ public void setSource(String source) {
+ this.source = source;
+ }
+
+ public String[] getSynset() {
+ return synset;
+ }
+
+ public void setSynset(String[] synset) {
+ this.synset = synset;
+ }
+
+ public String getGloss() {
+ return gloss;
+ }
+
+ public void setGloss(String gloss) {
+ this.gloss = gloss;
+ }
+}
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DictionaryInstance.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DistributionInstance.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DistributionInstance.java?rev=1689001&r1=1689000&r2=1689001&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DistributionInstance.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DistributionInstance.java Fri Jul 3 11:17:25 2015
@@ -1,69 +1,69 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-public class DistributionInstance {
-
- protected String word;
- protected String pos;
- protected int trainingSetInstances;
- protected int testSetInstances;
-
- public DistributionInstance(String word, String pos,
- int trainingSetInstances, int testSetInstances) {
- super();
- this.word = word;
- this.pos = pos;
- this.trainingSetInstances = trainingSetInstances;
- this.testSetInstances = testSetInstances;
- }
-
- public String getWord() {
- return word;
- }
-
- public void setWord(String word) {
- this.word = word;
- }
-
- public String getPos() {
- return pos;
- }
-
- public void setPos(String pos) {
- this.pos = pos;
- }
-
- public int getTrainingSetInstances() {
- return trainingSetInstances;
- }
-
- public void setTrainingSetInstances(int trainingSetInstances) {
- this.trainingSetInstances = trainingSetInstances;
- }
-
- public int getTestSetInstances() {
- return testSetInstances;
- }
-
- public void setTestSetInstances(int testSetInstances) {
- this.testSetInstances = testSetInstances;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+public class DistributionInstance {
+
+ protected String word;
+ protected String pos;
+ protected int trainingSetInstances;
+ protected int testSetInstances;
+
+ public DistributionInstance(String word, String pos,
+ int trainingSetInstances, int testSetInstances) {
+ super();
+ this.word = word;
+ this.pos = pos;
+ this.trainingSetInstances = trainingSetInstances;
+ this.testSetInstances = testSetInstances;
+ }
+
+ public String getWord() {
+ return word;
+ }
+
+ public void setWord(String word) {
+ this.word = word;
+ }
+
+ public String getPos() {
+ return pos;
+ }
+
+ public void setPos(String pos) {
+ this.pos = pos;
+ }
+
+ public int getTrainingSetInstances() {
+ return trainingSetInstances;
+ }
+
+ public void setTrainingSetInstances(int trainingSetInstances) {
+ this.trainingSetInstances = trainingSetInstances;
+ }
+
+ public int getTestSetInstances() {
+ return testSetInstances;
+ }
+
+ public void setTestSetInstances(int testSetInstances) {
+ this.testSetInstances = testSetInstances;
+ }
+}
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/DistributionInstance.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java?rev=1689001&r1=1689000&r2=1689001&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java Fri Jul 3 11:17:25 2015
@@ -1,185 +1,185 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-
-import java.util.ArrayList;
-
-import opennlp.tools.disambiguator.ims.WTDIMS;
-
-public class FeaturesExtractor {
-
- public FeaturesExtractor() {
- super();
- }
-
- /**
- * @Algorithm: IMS (It Makes Sense)
- *
- * The following methods serve to extract the features for the
- * algorithm IMS.
- *
- * Three families of features are to be extracted: - PoS of
- * Surrounding Words: it requires one parameter: "Window size" -
- * Surrounding Words: no parameters are required - Local
- * Collocations: it requires one parameter: "the n-gram"
- *
- */
- private String[] extractPosOfSurroundingWords(String[] sentence,
- int wordIndex, int windowSize) {
-
- String[] taggedSentence = Loader.getTagger().tag(sentence);
-
- String[] tags = new String[2 * windowSize + 1];
-
- int j = 0;
-
- for (int i = wordIndex - windowSize; i < wordIndex + windowSize; i++) {
- if (i < 0 || i >= sentence.length) {
- tags[j] = "null";
- } else {
- tags[j] = taggedSentence[i].toLowerCase();
- }
- j++;
- }
-
- return tags;
- }
-
- private String[] extractSurroundingWords(String[] sentence, int wordIndex) {
-
- String[] posTags = Loader.getTagger().tag(sentence);
-
- ArrayList<String> contextWords = new ArrayList<String>();
-
- for (int i = 0; i < sentence.length; i++) {
-
- if (!Constants.stopWords.contains(sentence[i].toLowerCase())
- && (wordIndex != i)) {
-
- String word = sentence[i].toLowerCase().replaceAll("[^a-z]", "").trim();
-
- if (!word.equals("")) {
- String lemma = Loader.getLemmatizer().lemmatize(sentence[i],
- posTags[i]);
- contextWords.add(lemma);
- }
-
- }
- }
-
- return contextWords.toArray(new String[contextWords.size()]);
- }
-
- private String[] extractLocalCollocations(String[] sentence, int wordIndex,
- int ngram) {
- /**
- * Here the author used only 11 features of this type. the range was set to
- * 3 (bigrams extracted in a way that they are at max separated by 1 word).
- */
-
- ArrayList<String> localCollocations = new ArrayList<String>();
-
- for (int i = wordIndex - ngram; i <= wordIndex + ngram; i++) {
-
- if (!(i < 0 || i > sentence.length - 3)) {
- if ((i != wordIndex) && (i + 1 != wordIndex)
- && (i + 1 < wordIndex + ngram)) {
- String lc = (sentence[i] + " " + sentence[i + 1]).toLowerCase();
- localCollocations.add(lc);
- }
- if ((i != wordIndex) && (i + 2 != wordIndex)
- && (i + 2 < wordIndex + ngram)) {
- String lc = (sentence[i] + " " + sentence[i + 2]).toLowerCase();
- localCollocations.add(lc);
- }
- }
-
- }
-
- String[] res = new String[localCollocations.size()];
- res = localCollocations.toArray(res);
-
- return res;
- }
-
- // public method
- /**
- * This method generates the different set of features related to the IMS
- * approach and store them in the corresponding attributes of the WTDIMS
- *
- * @param word
- * the word to disambiguate [object: WTDIMS]
- * @param windowSize
- * the parameter required to generate the features qualified of
- * "PoS of Surrounding Words"
- * @param ngram
- * the parameter required to generate the features qualified of
- * "Local Collocations"
- */
- public void extractIMSFeatures(WTDIMS word, int windowSize, int ngram) {
-
- word.setPosOfSurroundingWords(extractPosOfSurroundingWords(
- word.getSentence(), word.getWordIndex(), windowSize));
- word.setSurroundingWords(extractSurroundingWords(word.getSentence(),
- word.getWordIndex()));
- word.setLocalCollocations(extractLocalCollocations(word.getSentence(),
- word.getWordIndex(), ngram));
-
- }
-
- /**
- * This generates the context of IMS. It supposes that the features have
- * already been extracted and stored in the WTDIMS object, therefore it
- * doesn't require any parameters.
- *
- * @param word
- * @return the Context of the wordToDisambiguate
- */
- public String[] serializeIMSFeatures(WTDIMS word) {
-
- String[] posOfSurroundingWords = word.getPosOfSurroundingWords();
- String[] surroundingWords = word.getSurroundingWords();
- String[] localCollocations = word.getLocalCollocations();
-
- String[] serializedFeatures = new String[posOfSurroundingWords.length
- + surroundingWords.length + localCollocations.length];
-
- int i = 0;
-
- for (String feature : posOfSurroundingWords) {
- serializedFeatures[i] = "F" + i + "=" + feature;
- i++;
- }
-
- for (String feature : surroundingWords) {
- serializedFeatures[i] = "F" + i + "=" + feature;
- i++;
- }
-
- for (String feature : localCollocations) {
- serializedFeatures[i] = "F" + i + "=" + feature;
- i++;
- }
-
- return serializedFeatures;
-
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+
+import java.util.ArrayList;
+
+import opennlp.tools.disambiguator.ims.WTDIMS;
+
+public class FeaturesExtractor {
+
+ public FeaturesExtractor() {
+ super();
+ }
+
+ /**
+ * @Algorithm: IMS (It Makes Sense)
+ *
+ * The following methods serve to extract the features for the
+ * algorithm IMS.
+ *
+ * Three families of features are to be extracted: - PoS of
+ * Surrounding Words: it requires one parameter: "Window size" -
+ * Surrounding Words: no parameters are required - Local
+ * Collocations: it requires one parameter: "the n-gram"
+ *
+ */
+ private String[] extractPosOfSurroundingWords(String[] sentence,
+ int wordIndex, int windowSize) {
+
+ String[] taggedSentence = Loader.getTagger().tag(sentence);
+
+ String[] tags = new String[2 * windowSize + 1];
+
+ int j = 0;
+
+ for (int i = wordIndex - windowSize; i < wordIndex + windowSize; i++) {
+ if (i < 0 || i >= sentence.length) {
+ tags[j] = "null";
+ } else {
+ tags[j] = taggedSentence[i].toLowerCase();
+ }
+ j++;
+ }
+
+ return tags;
+ }
+
+ private String[] extractSurroundingWords(String[] sentence, int wordIndex) {
+
+ String[] posTags = Loader.getTagger().tag(sentence);
+
+ ArrayList<String> contextWords = new ArrayList<String>();
+
+ for (int i = 0; i < sentence.length; i++) {
+
+ if (!Constants.stopWords.contains(sentence[i].toLowerCase())
+ && (wordIndex != i)) {
+
+ String word = sentence[i].toLowerCase().replaceAll("[^a-z]", "").trim();
+
+ if (!word.equals("")) {
+ String lemma = Loader.getLemmatizer().lemmatize(sentence[i],
+ posTags[i]);
+ contextWords.add(lemma);
+ }
+
+ }
+ }
+
+ return contextWords.toArray(new String[contextWords.size()]);
+ }
+
+ private String[] extractLocalCollocations(String[] sentence, int wordIndex,
+ int ngram) {
+ /**
+ * Here the author used only 11 features of this type. the range was set to
+ * 3 (bigrams extracted in a way that they are at max separated by 1 word).
+ */
+
+ ArrayList<String> localCollocations = new ArrayList<String>();
+
+ for (int i = wordIndex - ngram; i <= wordIndex + ngram; i++) {
+
+ if (!(i < 0 || i > sentence.length - 3)) {
+ if ((i != wordIndex) && (i + 1 != wordIndex)
+ && (i + 1 < wordIndex + ngram)) {
+ String lc = (sentence[i] + " " + sentence[i + 1]).toLowerCase();
+ localCollocations.add(lc);
+ }
+ if ((i != wordIndex) && (i + 2 != wordIndex)
+ && (i + 2 < wordIndex + ngram)) {
+ String lc = (sentence[i] + " " + sentence[i + 2]).toLowerCase();
+ localCollocations.add(lc);
+ }
+ }
+
+ }
+
+ String[] res = new String[localCollocations.size()];
+ res = localCollocations.toArray(res);
+
+ return res;
+ }
+
+ // public method
+ /**
+ * This method generates the different set of features related to the IMS
+ * approach and store them in the corresponding attributes of the WTDIMS
+ *
+ * @param word
+ * the word to disambiguate [object: WTDIMS]
+ * @param windowSize
+ * the parameter required to generate the features qualified of
+ * "PoS of Surrounding Words"
+ * @param ngram
+ * the parameter required to generate the features qualified of
+ * "Local Collocations"
+ */
+ public void extractIMSFeatures(WTDIMS word, int windowSize, int ngram) {
+
+ word.setPosOfSurroundingWords(extractPosOfSurroundingWords(
+ word.getSentence(), word.getWordIndex(), windowSize));
+ word.setSurroundingWords(extractSurroundingWords(word.getSentence(),
+ word.getWordIndex()));
+ word.setLocalCollocations(extractLocalCollocations(word.getSentence(),
+ word.getWordIndex(), ngram));
+
+ }
+
+ /**
+ * This generates the context of IMS. It supposes that the features have
+ * already been extracted and stored in the WTDIMS object, therefore it
+ * doesn't require any parameters.
+ *
+ * @param word
+ * @return the Context of the wordToDisambiguate
+ */
+ public String[] serializeIMSFeatures(WTDIMS word) {
+
+ String[] posOfSurroundingWords = word.getPosOfSurroundingWords();
+ String[] surroundingWords = word.getSurroundingWords();
+ String[] localCollocations = word.getLocalCollocations();
+
+ String[] serializedFeatures = new String[posOfSurroundingWords.length
+ + surroundingWords.length + localCollocations.length];
+
+ int i = 0;
+
+ for (String feature : posOfSurroundingWords) {
+ serializedFeatures[i] = "F" + i + "=" + feature;
+ i++;
+ }
+
+ for (String feature : surroundingWords) {
+ serializedFeatures[i] = "F" + i + "=" + feature;
+ i++;
+ }
+
+ for (String feature : localCollocations) {
+ serializedFeatures[i] = "F" + i + "=" + feature;
+ i++;
+ }
+
+ return serializedFeatures;
+
+ }
+}
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/FeaturesExtractor.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Loader.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/Node.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/PreProcessor.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDParameters.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WSDisambiguator.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordPOS.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordSense.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/WordToDisambiguate.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java?rev=1689001&r1=1689000&r2=1689001&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java Fri Jul 3 11:17:25 2015
@@ -1,70 +1,70 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator.ims;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-
-import opennlp.tools.disambiguator.FeaturesExtractor;
-import opennlp.tools.disambiguator.ims.WTDIMS;
-
-public class DefaultIMSContextGenerator implements IMSContextGenerator {
-
- FeaturesExtractor fExtractor = new FeaturesExtractor();
-
- /**
- * Default context generator for IMS.
- */
-
- public DefaultIMSContextGenerator() {
- }
-
- /**
- * Get Context of a word To disambiguate
- */
- @Override
- public String[] getContext(WTDIMS word) {
- return fExtractor.serializeIMSFeatures(word);
- }
-
- /**
- * Returns an {@link ArrayList} of features for the object of type WTDIMS
- * Extensions of this class can override this method to create a customized
- * {@link IMSContextGenerator}
- *
- * @param word
- * : the word to disambiguate {@link WTDIMS} along with its sentence
- * [Check the Class WTDIMS]
- * @param numberOfSurroundingWords
- * : the number of surrounding words used in the feature
- * "POS Tags of Surrounding Words" Default value is 3
- * @param ngram
- * : the number of words used to extract the feature
- * "Local Collocations" Default value is 2
- *
- * @return an {@link ArrayList} of features
- */
-
- protected List<String> createContext(WTDIMS word) {
- return Arrays.asList(getContext(word));
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator.ims;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+
+import opennlp.tools.disambiguator.FeaturesExtractor;
+import opennlp.tools.disambiguator.ims.WTDIMS;
+
+public class DefaultIMSContextGenerator implements IMSContextGenerator {
+
+ FeaturesExtractor fExtractor = new FeaturesExtractor();
+
+ /**
+ * Default context generator for IMS.
+ */
+
+ public DefaultIMSContextGenerator() {
+ }
+
+ /**
+ * Get Context of a word To disambiguate
+ */
+ @Override
+ public String[] getContext(WTDIMS word) {
+ return fExtractor.serializeIMSFeatures(word);
+ }
+
+ /**
+ * Returns an {@link ArrayList} of features for the object of type WTDIMS
+ * Extensions of this class can override this method to create a customized
+ * {@link IMSContextGenerator}
+ *
+ * @param word
+ * : the word to disambiguate {@link WTDIMS} along with its sentence
+ * [Check the Class WTDIMS]
+ * @param numberOfSurroundingWords
+ * : the number of surrounding words used in the feature
+ * "POS Tags of Surrounding Words" Default value is 3
+ * @param ngram
+ * : the number of words used to extract the feature
+ * "Local Collocations" Default value is 2
+ *
+ * @return an {@link ArrayList} of features
+ */
+
+ protected List<String> createContext(WTDIMS word) {
+ return Arrays.asList(getContext(word));
+ }
+}
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/DefaultIMSContextGenerator.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/FeaturesExtractor.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMS.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java?rev=1689001&r1=1689000&r2=1689001&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java Fri Jul 3 11:17:25 2015
@@ -1,28 +1,28 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator.ims;
-
-/**
- * Interface for {@link IMSME} context generators.
- */
-public interface IMSContextGenerator {
-
- public String[] getContext(WTDIMS word);
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator.ims;
+
+/**
+ * Interface for {@link IMSME} context generators.
+ */
+public interface IMSContextGenerator {
+
+ public String[] getContext(WTDIMS word);
+}
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSContextGenerator.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSEventStream.java?rev=1689001&r1=1689000&r2=1689001&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSEventStream.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSEventStream.java Fri Jul 3 11:17:25 2015
@@ -1,54 +1,54 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator.ims;
-
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import opennlp.tools.ml.model.Event;
-import opennlp.tools.util.AbstractEventStream;
-import opennlp.tools.util.ObjectStream;
-
-public class IMSEventStream extends AbstractEventStream<WTDIMS> {
-
- private IMSContextGenerator cg;
-
- public IMSEventStream(ObjectStream<WTDIMS> samples) {
- super(samples);
- }
-
- @Override
- protected Iterator<Event> createEvents(WTDIMS sample) {
- List<Event> events = new ArrayList<Event>();
-
- int sense = sample.getSense();
-
- String[] context = cg.getContext(sample);
-
- Event ev = new Event(sense + "", context);
-
- events.add(ev);
-
- return events.iterator();
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator.ims;
+
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.util.AbstractEventStream;
+import opennlp.tools.util.ObjectStream;
+
+public class IMSEventStream extends AbstractEventStream<WTDIMS> {
+
+ private IMSContextGenerator cg;
+
+ public IMSEventStream(ObjectStream<WTDIMS> samples) {
+ super(samples);
+ }
+
+ @Override
+ protected Iterator<Event> createEvents(WTDIMS sample) {
+ List<Event> events = new ArrayList<Event>();
+
+ int sense = sample.getSense();
+
+ String[] context = cg.getContext(sample);
+
+ Event ev = new Event(sense + "", context);
+
+ events.add(ev);
+
+ return events.iterator();
+ }
+
+}
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSEventStream.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java?rev=1689001&r1=1689000&r2=1689001&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java Fri Jul 3 11:17:25 2015
@@ -1,101 +1,101 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator.ims;
-
-import opennlp.tools.util.BaseToolFactory;
-import opennlp.tools.util.InvalidFormatException;
-
-public class IMSFactory extends BaseToolFactory {
-
- protected String languageCode;
-
- protected String resourcesFolder = "src\\test\\resources\\supervised\\";
-
- protected String rawDataDirectory = resourcesFolder + "training\\";
- protected String trainingDataDirectory = resourcesFolder + "models\\";
- protected String dictionaryDirectory = resourcesFolder + "dictionary\\";
-
- protected String dict = dictionaryDirectory + "EnglishLS.dictionary.xml";
- protected String map = dictionaryDirectory + "EnglishLS.sensemap";
-
- public IMSFactory() {
- super();
- }
-
- public String getLanguageCode() {
- return languageCode;
- }
-
- public void setLanguageCode(String languageCode) {
- this.languageCode = languageCode;
- }
-
- public String getRawDataDirectory() {
- return rawDataDirectory;
- }
-
- public void setRawDataDirectory(String rawDataDirectory) {
- this.rawDataDirectory = rawDataDirectory;
- }
-
- public String getTrainingDataDirectory() {
- return trainingDataDirectory;
- }
-
- public void setTrainingDataDirectory(String trainingDataDirectory) {
- this.trainingDataDirectory = trainingDataDirectory;
- }
-
- public String getDictionaryDirectory() {
- return dictionaryDirectory;
- }
-
- public void setDictionaryDirectory(String dictionaryDirectory) {
- this.dictionaryDirectory = dictionaryDirectory;
- }
-
- public String getDict() {
- return dict;
- }
-
- public void setDict(String dict) {
- this.dict = dict;
- }
-
- public String getMap() {
- return map;
- }
-
- public void setMap(String map) {
- this.map = map;
- }
-
- void init() {
- }
-
- public IMSContextGenerator createContextGenerator() {
-
- return new DefaultIMSContextGenerator();
- }
-
- @Override
- public void validateArtifactMap() throws InvalidFormatException {
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator.ims;
+
+import opennlp.tools.util.BaseToolFactory;
+import opennlp.tools.util.InvalidFormatException;
+
+public class IMSFactory extends BaseToolFactory {
+
+ protected String languageCode;
+
+ protected String resourcesFolder = "src\\test\\resources\\supervised\\";
+
+ protected String rawDataDirectory = resourcesFolder + "training\\";
+ protected String trainingDataDirectory = resourcesFolder + "models\\";
+ protected String dictionaryDirectory = resourcesFolder + "dictionary\\";
+
+ protected String dict = dictionaryDirectory + "EnglishLS.dictionary.xml";
+ protected String map = dictionaryDirectory + "EnglishLS.sensemap";
+
+ public IMSFactory() {
+ super();
+ }
+
+ public String getLanguageCode() {
+ return languageCode;
+ }
+
+ public void setLanguageCode(String languageCode) {
+ this.languageCode = languageCode;
+ }
+
+ public String getRawDataDirectory() {
+ return rawDataDirectory;
+ }
+
+ public void setRawDataDirectory(String rawDataDirectory) {
+ this.rawDataDirectory = rawDataDirectory;
+ }
+
+ public String getTrainingDataDirectory() {
+ return trainingDataDirectory;
+ }
+
+ public void setTrainingDataDirectory(String trainingDataDirectory) {
+ this.trainingDataDirectory = trainingDataDirectory;
+ }
+
+ public String getDictionaryDirectory() {
+ return dictionaryDirectory;
+ }
+
+ public void setDictionaryDirectory(String dictionaryDirectory) {
+ this.dictionaryDirectory = dictionaryDirectory;
+ }
+
+ public String getDict() {
+ return dict;
+ }
+
+ public void setDict(String dict) {
+ this.dict = dict;
+ }
+
+ public String getMap() {
+ return map;
+ }
+
+ public void setMap(String map) {
+ this.map = map;
+ }
+
+ void init() {
+ }
+
+ public IMSContextGenerator createContextGenerator() {
+
+ return new DefaultIMSContextGenerator();
+ }
+
+ @Override
+ public void validateArtifactMap() throws InvalidFormatException {
+ }
+}
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/IMSFactory.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/ims/WTDIMS.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/Lesk.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/LeskParameters.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/main/java/opennlp/tools/disambiguator/lesk/WTDLesk.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java?rev=1689001&r1=1689000&r2=1689001&view=diff
==============================================================================
--- opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java (original)
+++ opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java Fri Jul 3 11:17:25 2015
@@ -1,48 +1,48 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package opennlp.tools.disambiguator;
-
-import opennlp.tools.disambiguator.ims.IMS;
-
-public class IMSTester {
-
- public static void main(String[] args) {
-
- IMS ims = new IMS();
-
- String test = "You have to write an essay without using a dictionary!";
- String[] sentence = Loader.getTokenizer().tokenize(test);
- Constants.print(ims.disambiguate(sentence, 3));
-
- String test2 = "Please write to me soon.";
- String[] sentence2 = Loader.getTokenizer().tokenize(test2);
- Constants.print(ims.disambiguate(sentence2, 1));
-
- String test3 = "the argument over foreign aid goes on and on";
- String[] sentence3 = Loader.getTokenizer().tokenize(test3);
- Constants.print(ims.disambiguate(sentence3, 1));
-
- String test4 = "it was a strong argument that his hypothesis was true";
- String[] sentence4 = Loader.getTokenizer().tokenize(test4);
- Constants.print(ims.disambiguate(sentence4, 3));
-
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package opennlp.tools.disambiguator;
+
+import opennlp.tools.disambiguator.ims.IMS;
+
+public class IMSTester {
+
+ public static void main(String[] args) {
+
+ IMS ims = new IMS();
+
+ String test = "You have to write an essay without using a dictionary!";
+ String[] sentence = Loader.getTokenizer().tokenize(test);
+ Constants.print(ims.disambiguate(sentence, 3));
+
+ String test2 = "Please write to me soon.";
+ String[] sentence2 = Loader.getTokenizer().tokenize(test2);
+ Constants.print(ims.disambiguate(sentence2, 1));
+
+ String test3 = "the argument over foreign aid goes on and on";
+ String[] sentence3 = Loader.getTokenizer().tokenize(test3);
+ Constants.print(ims.disambiguate(sentence3, 1));
+
+ String test4 = "it was a strong argument that his hypothesis was true";
+ String[] sentence4 = Loader.getTokenizer().tokenize(test4);
+ Constants.print(ims.disambiguate(sentence4, 3));
+
+ }
+
+}
Propchange: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/IMSTester.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: opennlp/sandbox/opennlp-wsd/src/test/java/opennlp/tools/disambiguator/Tester.java
------------------------------------------------------------------------------
svn:eol-style = native