You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2023/01/19 13:07:19 UTC
[opennlp-sandbox] 01/01: updates sandbox component 'opennlp-coref' to be compatible with latest opennlp-tools release - adjusts opennlp-tools to 2.1.0 - adjusts parent project (org.apache.apache) to version 18 - adjusts Java language level to 11
This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch migrate-opennlp-coref-to-opennlp-tools-2_1_0
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
commit ba7fcf589ec2fd34b738e9b19c57fb374f1f3553
Author: Martin Wiesner <ma...@hs-heilbronn.de>
AuthorDate: Thu Jan 19 14:07:10 2023 +0100
updates sandbox component 'opennlp-coref' to be compatible with latest opennlp-tools release
- adjusts opennlp-tools to 2.1.0
- adjusts parent project (org.apache.apache) to version 18
- adjusts Java language level to 11
---
opennlp-coref/pom.xml | 10 +-
.../tools/cmdline/coref/CoreferencerTool.java | 37 +++--
.../tools/coref/resolver/AbstractResolver.java | 16 +-
.../resolver/DefaultNonReferentialResolver.java | 35 +++--
.../tools/coref/resolver/MaxentResolver.java | 44 ++++--
.../java/opennlp/tools/coref/sim/GenderModel.java | 124 +++++++--------
.../java/opennlp/tools/coref/sim/NumberModel.java | 88 ++++++-----
.../opennlp/tools/coref/sim/SimilarityModel.java | 170 ++++++++++-----------
.../tools/formats/CorefSampleStreamFactory.java | 19 ++-
9 files changed, 280 insertions(+), 263 deletions(-)
diff --git a/opennlp-coref/pom.xml b/opennlp-coref/pom.xml
index 033ffc2..a3d3d14 100644
--- a/opennlp-coref/pom.xml
+++ b/opennlp-coref/pom.xml
@@ -25,12 +25,12 @@
<parent>
<groupId>org.apache</groupId>
<artifactId>apache</artifactId>
- <version>13</version>
+ <version>18</version>
<relativePath />
</parent>
<artifactId>opennlp-coref</artifactId>
- <version>1.6.0-SNAPSHOT</version>
+ <version>2.1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Apache OpenNLP Coreferencer</name>
@@ -38,7 +38,7 @@
<dependency>
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-tools</artifactId>
- <version>1.6.0</version>
+ <version>2.1.0</version>
<scope>compile</scope>
</dependency>
@@ -69,8 +69,8 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
- <source>1.8</source>
- <target>1.8</target>
+ <source>11</source>
+ <target>11</target>
<compilerArgument>-Xlint</compilerArgument>
</configuration>
</plugin>
diff --git a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java
index 885951c..9ad4276 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/cmdline/coref/CoreferencerTool.java
@@ -18,7 +18,7 @@
package opennlp.tools.cmdline.coref;
import java.io.IOException;
-import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -30,6 +30,7 @@ import opennlp.tools.cmdline.BasicCmdLineTool;
import opennlp.tools.cmdline.CLI;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.PerformanceMonitor;
+import opennlp.tools.cmdline.SystemInputStreamFactory;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.LinkerMode;
@@ -47,12 +48,12 @@ public class CoreferencerTool extends BasicCmdLineTool {
class CorefParse {
- private Map<Parse, Integer> parseMap;
- private List<Parse> parses;
+ private final Map<Parse, Integer> parseMap;
+ private final List<Parse> parses;
public CorefParse(List<Parse> parses, DiscourseEntity[] entities) {
this.parses = parses;
- parseMap = new HashMap<Parse, Integer>();
+ parseMap = new HashMap<>();
for (int ei = 0, en = entities.length; ei < en;ei++) {
if (entities[ei].getNumMentions() > 1) {
for (Iterator<MentionContext> mi = entities[ei].getMentions(); mi.hasNext();) {
@@ -65,8 +66,7 @@ public class CoreferencerTool extends BasicCmdLineTool {
}
public void show() {
- for (int pi = 0, pn = parses.size(); pi < pn;pi++) {
- Parse p = parses.get(pi);
+ for (Parse p : parses) {
show(p);
System.out.println();
}
@@ -85,8 +85,7 @@ public class CoreferencerTool extends BasicCmdLineTool {
System.out.print(" ");
}
Parse[] children = p.getChildren();
- for (int pi = 0, pn = children.length; pi < pn;pi++) {
- Parse c = children[pi];
+ for (Parse c : children) {
Span s = c.getSpan();
if (start < s.getStart()) {
System.out.print(p.getText().substring(start, s.getStart()));
@@ -104,7 +103,8 @@ public class CoreferencerTool extends BasicCmdLineTool {
public String getShortDescription() {
return "learnable noun phrase coreferencer";
}
-
+
+ @Override
public void run(String[] args) {
if (args.length != 1) {
System.out.println(getHelp());
@@ -118,17 +118,15 @@ public class CoreferencerTool extends BasicCmdLineTool {
throw new TerminateToolException(-1, "Failed to load all coreferencer models!", e);
}
- ObjectStream<String> lineStream =
- new PlainTextByLineStream(new InputStreamReader(System.in));
-
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "parses");
perfMon.start();
- try {
+ try (ObjectStream<String> lineStream = new PlainTextByLineStream(
+ new SystemInputStreamFactory(), StandardCharsets.UTF_8)) {
int sentenceNumber = 0;
- List<Mention> document = new ArrayList<Mention>();
- List<Parse> parses = new ArrayList<Parse>();
+ List<Mention> document = new ArrayList<>();
+ List<Parse> parses = new ArrayList<>();
String line;
while ((line = lineStream.read()) != null) {
@@ -148,14 +146,14 @@ public class CoreferencerTool extends BasicCmdLineTool {
Mention[] extents =
treebankLinker.getMentionFinder().getMentions(new DefaultParse(p,sentenceNumber));
//construct new parses for mentions which don't have constituents.
- for (int ei = 0, en = extents.length; ei < en;ei++) {
+ for (Mention extent : extents) {
//System.err.println("PennTreebankLiner.main: "+ei+" "+extents[ei]);
- if (extents[ei].getParse() == null) {
+ if (extent.getParse() == null) {
//not sure how to get head index, but its not used at this point.
- Parse snp = new Parse(p.getText(),extents[ei].getSpan(),"NML",1.0,0);
+ Parse snp = new Parse(p.getText(), extent.getSpan(), "NML", 1.0, 0);
p.insert(snp);
- extents[ei].setParse(new DefaultParse(snp,sentenceNumber));
+ extent.setParse(new DefaultParse(snp, sentenceNumber));
}
}
@@ -174,6 +172,7 @@ public class CoreferencerTool extends BasicCmdLineTool {
}
}
+ @Override
public String getHelp() {
return "Usage: " + CLI.CMD + " " + getName() + " model_directory < parses";
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java
index 77b1384..370e209 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/AbstractResolver.java
@@ -18,12 +18,13 @@
package opennlp.tools.coref.resolver;
import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.DiscourseModel;
import opennlp.tools.coref.mention.MentionContext;
import opennlp.tools.coref.mention.Parse;
-import opennlp.tools.util.CountedSet;
/**
* Default implementation of some methods in the {@link Resolver} interface.
@@ -46,7 +47,7 @@ public abstract class AbstractResolver implements Resolver {
* Debugging variable which holds statistics about mention distances
* during training.
*/
- protected CountedSet<Integer> distances;
+ protected Map<Integer, Integer> distances;
/**
* The number of sentences back this resolver should look for a referent.
@@ -56,7 +57,7 @@ public abstract class AbstractResolver implements Resolver {
public AbstractResolver(int neb) {
numEntitiesBack = neb;
showExclusions = true;
- distances = new CountedSet<Integer>();
+ distances = new HashMap<>();
}
/**
@@ -169,7 +170,14 @@ public abstract class AbstractResolver implements Resolver {
DiscourseEntity cde = dm.getEntity(ei);
MentionContext cec = cde.getLastExtent(); // candidate extent context
if (cec.getId() == mention.getId()) {
- distances.add(ei);
+ // adding counts
+ Integer count = distances.get(ei);
+ if (count == null ) {
+ distances.put(ei, 1);
+ }
+ else {
+ distances.put(ei, count + 1);
+ }
return cde;
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
index 142bab1..7439e76 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.java
@@ -17,8 +17,10 @@
package opennlp.tools.coref.resolver;
+import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
@@ -27,13 +29,14 @@ import java.util.List;
import opennlp.tools.coref.mention.MentionContext;
import opennlp.tools.coref.mention.Parse;
-import opennlp.tools.ml.maxent.GIS;
+import opennlp.tools.ml.maxent.GISModel;
+import opennlp.tools.ml.maxent.GISTrainer;
import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
+import opennlp.tools.ml.maxent.io.BinaryGISModelWriter;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.util.ObjectStreamUtils;
+import opennlp.tools.util.TrainingParameters;
/**
* Default implementation of the {@link NonReferentialResolver} interface.
@@ -43,10 +46,10 @@ public class DefaultNonReferentialResolver implements NonReferentialResolver {
private MaxentModel model;
private List<Event> events;
private boolean loadAsResource;
- private boolean debugOn = false;
- private ResolverMode mode;
- private String modelName;
- private String modelExtension = ".bin.gz";
+ private final boolean debugOn = false;
+ private final ResolverMode mode;
+ private final String modelName;
+ private final String modelExtension = ".bin.gz";
private int nonRefIndex;
public DefaultNonReferentialResolver(String projectName, String name, ResolverMode mode)
@@ -62,7 +65,10 @@ public class DefaultNonReferentialResolver implements NonReferentialResolver {
this.getClass().getResourceAsStream(modelName))).getModel();
}
else {
- model = (new SuffixSensitiveGISModelReader(new File(modelName + modelExtension))).getModel();
+ try (DataInputStream dis = new DataInputStream(
+ new BufferedInputStream(new FileInputStream(modelName + modelExtension)))) {
+ model = new BinaryGISModelReader(dis).getModel();
+ }
}
nonRefIndex = model.getIndex(MaxentResolver.SAME);
}
@@ -71,6 +77,7 @@ public class DefaultNonReferentialResolver implements NonReferentialResolver {
}
}
+ @Override
public double getNonReferentialProbability(MentionContext mention) {
List<String> features = getFeatures(mention);
double r = model.eval(features.toArray(new String[features.size()]))[nonRefIndex];
@@ -78,6 +85,7 @@ public class DefaultNonReferentialResolver implements NonReferentialResolver {
return r;
}
+ @Override
public void addEvent(MentionContext ec) {
List<String> features = getFeatures(ec);
if (-1 == ec.getId()) {
@@ -115,6 +123,7 @@ public class DefaultNonReferentialResolver implements NonReferentialResolver {
return features;
}
+ @Override
public void train() throws IOException {
if (ResolverMode.TRAIN == mode) {
System.err.println(this + " referential");
@@ -126,9 +135,13 @@ public class DefaultNonReferentialResolver implements NonReferentialResolver {
}
writer.close();
}
- new SuffixSensitiveGISModelWriter(GIS.trainModel(
- ObjectStreamUtils.createObjectStream(events),100,10),
- new File(modelName + modelExtension)).persist();
+ TrainingParameters params = TrainingParameters.defaultParams();
+ params.put(TrainingParameters.ITERATIONS_PARAM, 100);
+ params.put(TrainingParameters.CUTOFF_PARAM, 10);
+ GISTrainer trainer = new GISTrainer();
+ trainer.init(params, null);
+ GISModel trainedModel = trainer.trainModel(ObjectStreamUtils.createObjectStream(events));
+ new BinaryGISModelWriter(trainedModel, new File(modelName + modelExtension)).persist();
}
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
index 12ff359..3710608 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/resolver/MaxentResolver.java
@@ -17,23 +17,27 @@
package opennlp.tools.coref.resolver;
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Iterator;
import java.util.List;
import opennlp.tools.coref.DiscourseEntity;
import opennlp.tools.coref.DiscourseModel;
import opennlp.tools.coref.mention.MentionContext;
import opennlp.tools.coref.sim.TestSimilarityModel;
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
+import opennlp.tools.ml.maxent.GISModel;
+import opennlp.tools.ml.maxent.GISTrainer;
+import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
+import opennlp.tools.ml.maxent.io.BinaryGISModelWriter;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.util.ObjectStreamUtils;
+import opennlp.tools.util.TrainingParameters;
/**
* Provides common functionality used by classes which implement the {@link Resolver} class
@@ -118,7 +122,10 @@ public abstract class MaxentResolver extends AbstractResolver {
this.mode = mode;
this.modelName = modelDirectory + "/" + name;
if (ResolverMode.TEST == this.mode) {
- model = (new SuffixSensitiveGISModelReader(new File(modelName + modelExtension))).getModel();
+ try (DataInputStream dis = new DataInputStream(
+ new BufferedInputStream(new FileInputStream(modelName + modelExtension)))) {
+ model = new BinaryGISModelReader(dis).getModel();
+ }
sameIndex = model.getIndex(SAME);
}
else if (ResolverMode.TRAIN == this.mode) {
@@ -169,6 +176,7 @@ public abstract class MaxentResolver extends AbstractResolver {
new FixedNonReferentialResolver(nonReferentialProbability));
}
+ @Override
public DiscourseEntity resolve(MentionContext ec, DiscourseModel dm) {
DiscourseEntity de;
int ei = 0;
@@ -229,8 +237,8 @@ public abstract class MaxentResolver extends AbstractResolver {
/**
* Returns whether the specified entity satisfies the criteria for being a default referent.
- * This criteria is used to perform sample selection on the training data and to select a single
- * non-referent entity. Typically the criteria is a heuristic for a likely referent.
+ * These criteria are used to perform sample selection on the training data and to select a single
+ * non-referent entity. Typically, the criteria is a heuristic for a likely referent.
* @param de The discourse entity being considered for non-reference.
* @return True if the entity should be used as a default referent, false otherwise.
*/
@@ -286,7 +294,14 @@ public abstract class MaxentResolver extends AbstractResolver {
events.add(new Event(SAME, features.toArray(new String[features.size()])));
de = cde;
//System.err.println("MaxentResolver.retain: resolved at "+ei);
- distances.add(ei);
+ // adding counts
+ Integer count = distances.get(ei);
+ if (count == null ) {
+ distances.put(ei, 1);
+ }
+ else {
+ distances.put(ei, count + 1);
+ }
}
else if (!pairedSampleSelection || (!nonReferentFound && useAsDifferentExample)) {
nonReferentFound = true;
@@ -333,14 +348,19 @@ public abstract class MaxentResolver extends AbstractResolver {
if (debugOn) {
System.err.println(this + " referential");
FileWriter writer = new FileWriter(modelName + ".events");
- for (Iterator<Event> ei = events.iterator(); ei.hasNext();) {
- Event e = ei.next();
+ for (Event e : events) {
writer.write(e.toString() + "\n");
}
writer.close();
}
- (new SuffixSensitiveGISModelWriter(GIS.trainModel(ObjectStreamUtils.createObjectStream(events),
- 100,10),new File(modelName + modelExtension))).persist();
+ TrainingParameters params = TrainingParameters.defaultParams();
+ params.put(TrainingParameters.ITERATIONS_PARAM, 100);
+ params.put(TrainingParameters.CUTOFF_PARAM, 10);
+ GISTrainer trainer = new GISTrainer();
+ trainer.init(params, null);
+ GISModel trainedModel = trainer.trainModel(ObjectStreamUtils.createObjectStream(events));
+ new BinaryGISModelWriter(trainedModel, new File(modelName + modelExtension)).persist();
+
nonReferentialResolver.train();
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
index 13e8300..2c06836 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
@@ -18,34 +18,31 @@
package opennlp.tools.coref.sim;
+import java.io.BufferedInputStream;
import java.io.BufferedReader;
+import java.io.DataInputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
+import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.List;
import java.util.Set;
import opennlp.tools.coref.resolver.ResolverUtils;
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
-import opennlp.tools.ml.model.AbstractModel;
+import opennlp.tools.ml.maxent.GISModel;
+import opennlp.tools.ml.maxent.GISTrainer;
+import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
+import opennlp.tools.ml.maxent.io.BinaryGISModelWriter;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
-import opennlp.tools.util.HashList;
import opennlp.tools.util.ObjectStreamUtils;
-
-//import opennlp.maxent.GIS;
-//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
-//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
-//import opennlp.model.Event;
-//import opennlp.model.MaxentModel;
+import opennlp.tools.util.TrainingParameters;
/**
* Class which models the gender of a particular mentions and entities made up of mentions.
@@ -56,27 +53,25 @@ public class GenderModel implements TestGenderModel, TrainSimilarityModel {
private int femaleIndex;
private int neuterIndex;
- private String modelName;
- private String modelExtension = ".bin.gz";
+ private final String modelName;
+ private final String modelExtension = ".bin.gz";
private MaxentModel testModel;
private Collection<Event> events;
- private boolean debugOn = true;
+ private final boolean debugOn = true;
- private Set<String> maleNames;
- private Set<String> femaleNames;
+ private final Set<String> maleNames;
+ private final Set<String> femaleNames;
public static TestGenderModel testModel(String name) throws IOException {
- GenderModel gm = new GenderModel(name, false);
- return gm;
+ return new GenderModel(name, false);
}
public static TrainSimilarityModel trainModel(String name) throws IOException {
- GenderModel gm = new GenderModel(name, true);
- return gm;
+ return new GenderModel(name, true);
}
private Set<String> readNames(String nameFile) throws IOException {
- Set<String> names = new HashSet<String>();
+ Set<String> names = new HashSet<>();
BufferedReader nameReader = new BufferedReader(new FileReader(nameFile));
for (String line = nameReader.readLine(); line != null; line = nameReader.readLine()) {
names.add(line);
@@ -89,17 +84,16 @@ public class GenderModel implements TestGenderModel, TrainSimilarityModel {
maleNames = readNames(modelName + ".mas");
femaleNames = readNames(modelName + ".fem");
if (train) {
- events = new ArrayList<Event>();
+ events = new ArrayList<>();
}
else {
- //if (MaxentResolver.loadAsResource()) {
- // testModel = (new BinaryGISModelReader(new DataInputStream(
- // this.getClass().getResourceAsStream(modelName)))).getModel();
- //}
- testModel = (new SuffixSensitiveGISModelReader(new File(modelName + modelExtension))).getModel();
- maleIndex = testModel.getIndex(GenderEnum.MALE.toString());
- femaleIndex = testModel.getIndex(GenderEnum.FEMALE.toString());
- neuterIndex = testModel.getIndex(GenderEnum.NEUTER.toString());
+ try (DataInputStream dis = new DataInputStream(
+ new BufferedInputStream(new FileInputStream(modelName + modelExtension)))) {
+ testModel = new BinaryGISModelReader(dis).getModel();
+ maleIndex = testModel.getIndex(GenderEnum.MALE.toString());
+ femaleIndex = testModel.getIndex(GenderEnum.FEMALE.toString());
+ neuterIndex = testModel.getIndex(GenderEnum.NEUTER.toString());
+ }
}
}
@@ -168,8 +162,7 @@ public class GenderModel implements TestGenderModel, TrainSimilarityModel {
}
private GenderEnum getGender(List<Context> entity) {
- for (Iterator<Context> ci = entity.iterator(); ci.hasNext();) {
- Context ec = ci.next();
+ for (Context ec : entity) {
GenderEnum ge = getGender(ec);
if (ge != GenderEnum.UNKNOWN) {
return ge;
@@ -181,62 +174,51 @@ public class GenderModel implements TestGenderModel, TrainSimilarityModel {
@SuppressWarnings("unchecked")
public void setExtents(Context[] extentContexts) {
- HashList entities = new HashList();
- List<Context> singletons = new ArrayList<Context>();
- for (int ei = 0, el = extentContexts.length; ei < el; ei++) {
- Context ec = extentContexts[ei];
+ HashMap<Integer,Context> entities = new HashMap<>();
+ List<Context> singletons = new ArrayList<>();
+ for (Context ec : extentContexts) {
//System.err.println("GenderModel.setExtents: ec("+ec.getId()+") "+ec.toText());
if (ec.getId() != -1) {
entities.put(ec.getId(), ec);
- }
- else {
+ } else {
singletons.add(ec);
}
}
- List<Context> males = new ArrayList<Context>();
- List<Context> females = new ArrayList<Context>();
- List<Context> eunuches = new ArrayList<Context>();
+ List<Context> males = new ArrayList<>();
+ List<Context> females = new ArrayList<>();
+ List<Context> eunuches = new ArrayList<>();
//coref entities
- for (Iterator<Integer> ei = entities.keySet().iterator(); ei.hasNext();) {
- Integer key = ei.next();
+ for (Integer key : entities.keySet()) {
List<Context> entityContexts = (List<Context>) entities.get(key);
GenderEnum gender = getGender(entityContexts);
if (gender != null) {
if (gender == GenderEnum.MALE) {
males.addAll(entityContexts);
- }
- else if (gender == GenderEnum.FEMALE) {
+ } else if (gender == GenderEnum.FEMALE) {
females.addAll(entityContexts);
- }
- else if (gender == GenderEnum.NEUTER) {
+ } else if (gender == GenderEnum.NEUTER) {
eunuches.addAll(entityContexts);
}
}
}
//non-coref entities
- for (Iterator<Context> ei = singletons.iterator(); ei.hasNext();) {
- Context ec = ei.next();
+ for (Context ec : singletons) {
GenderEnum gender = getGender(ec);
if (gender == GenderEnum.MALE) {
males.add(ec);
- }
- else if (gender == GenderEnum.FEMALE) {
+ } else if (gender == GenderEnum.FEMALE) {
females.add(ec);
- }
- else if (gender == GenderEnum.NEUTER) {
+ } else if (gender == GenderEnum.NEUTER) {
eunuches.add(ec);
}
}
- for (Iterator<Context> mi = males.iterator(); mi.hasNext();) {
- Context ec = mi.next();
+ for (Context ec : males) {
addEvent(GenderEnum.MALE.toString(), ec);
}
- for (Iterator<Context> fi = females.iterator(); fi.hasNext();) {
- Context ec = fi.next();
+ for (Context ec : females) {
addEvent(GenderEnum.FEMALE.toString(), ec);
}
- for (Iterator<Context> ei = eunuches.iterator(); ei.hasNext();) {
- Context ec = ei.next();
+ for (Context ec : eunuches) {
addEvent(GenderEnum.NEUTER.toString(), ec);
}
}
@@ -259,38 +241,40 @@ public class GenderModel implements TestGenderModel, TrainSimilarityModel {
}
}
+ @Override
public double[] genderDistribution(Context np1) {
List<String> features = getFeatures(np1);
- if (debugOn) {
- //System.err.println("GenderModel.genderDistribution: "+features);
- }
+ //System.err.println("GenderModel.genderDistribution: "+features);
return testModel.eval(features.toArray(new String[features.size()]));
}
+ @Override
public void trainModel() throws IOException {
if (debugOn) {
FileWriter writer = new FileWriter(modelName + ".events");
- for (Iterator<Event> ei = events.iterator();ei.hasNext();) {
- Event e = ei.next();
+ for (Event e : events) {
writer.write(e.toString() + "\n");
}
writer.close();
}
-
- new SuffixSensitiveGISModelWriter(
- // GIS.trainModel((EventStream)new CollectionEventStream(events), true)).persist();
- (AbstractModel) GIS.trainModel(ObjectStreamUtils.createObjectStream(events), true),
- new File(modelName + modelExtension)).persist();
+ GISTrainer trainer = new GISTrainer();
+ trainer.init(TrainingParameters.defaultParams(), null);
+ trainer.setSmoothing(true);
+ GISModel trainedModel = trainer.trainModel(ObjectStreamUtils.createObjectStream(events));
+ new BinaryGISModelWriter(trainedModel, new File(modelName + modelExtension)).persist();
}
+ @Override
public int getFemaleIndex() {
return femaleIndex;
}
+ @Override
public int getMaleIndex() {
return maleIndex;
}
+ @Override
public int getNeuterIndex() {
return neuterIndex;
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
index 6f3be6d..fa8070a 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
@@ -17,34 +17,33 @@
package opennlp.tools.coref.sim;
+import java.io.BufferedInputStream;
+import java.io.DataInputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Iterator;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import opennlp.tools.coref.resolver.ResolverUtils;
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
+import opennlp.tools.ml.maxent.GISModel;
+import opennlp.tools.ml.maxent.GISTrainer;
+import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
+import opennlp.tools.ml.maxent.io.BinaryGISModelWriter;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
-import opennlp.tools.util.HashList;
import opennlp.tools.util.ObjectStreamUtils;
-
-//import opennlp.maxent.GIS;
-//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
-//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
-//import opennlp.model.Event;
-//import opennlp.model.MaxentModel;
+import opennlp.tools.util.TrainingParameters;
/**
* Class which models the number of particular mentions and the entities made up of mentions.
*/
public class NumberModel implements TestNumberModel, TrainSimilarityModel {
- private String modelName;
- private String modelExtension = ".bin.gz";
+ private final String modelName;
+ private final String modelExtension = ".bin.gz";
private MaxentModel testModel;
private List<Event> events;
@@ -52,13 +51,11 @@ public class NumberModel implements TestNumberModel, TrainSimilarityModel {
private int pluralIndex;
public static TestNumberModel testModel(String name) throws IOException {
- NumberModel nm = new NumberModel(name, false);
- return nm;
+ return new NumberModel(name, false);
}
public static TrainSimilarityModel trainModel(String modelName) throws IOException {
- NumberModel gm = new NumberModel(modelName, true);
- return gm;
+ return new NumberModel(modelName, true);
}
private NumberModel(String modelName, boolean train) throws IOException {
@@ -67,18 +64,17 @@ public class NumberModel implements TestNumberModel, TrainSimilarityModel {
events = new ArrayList<Event>();
}
else {
- //if (MaxentResolver.loadAsResource()) {
- // testModel = (new PlainTextGISModelReader(new BufferedReader(new InputStreamReader(
- // this.getClass().getResourceAsStream(modelName))))).getModel();
- //}
- testModel = (new SuffixSensitiveGISModelReader(new File(modelName + modelExtension))).getModel();
+ try (DataInputStream dis = new DataInputStream(
+ new BufferedInputStream(new FileInputStream(modelName + modelExtension)))) {
+ testModel = new BinaryGISModelReader(dis).getModel();
+ }
singularIndex = testModel.getIndex(NumberEnum.SINGULAR.toString());
pluralIndex = testModel.getIndex(NumberEnum.PLURAL.toString());
}
}
private List<String> getFeatures(Context np1) {
- List<String> features = new ArrayList<String>();
+ List<String> features = new ArrayList<>();
features.add("default");
Object[] npTokens = np1.getTokens();
for (int ti = 0, tl = npTokens.length - 1; ti < tl; ti++) {
@@ -107,8 +103,7 @@ public class NumberModel implements TestNumberModel, TrainSimilarityModel {
}
private NumberEnum getNumber(List<Context> entity) {
- for (Iterator<Context> ci = entity.iterator(); ci.hasNext();) {
- Context ec = ci.next();
+ for (Context ec : entity) {
NumberEnum ne = getNumber(ec);
if (ne != NumberEnum.UNKNOWN) {
return ne;
@@ -117,10 +112,11 @@ public class NumberModel implements TestNumberModel, TrainSimilarityModel {
return NumberEnum.UNKNOWN;
}
+ @Override
@SuppressWarnings("unchecked")
public void setExtents(Context[] extentContexts) {
- HashList entities = new HashList();
- List<Context> singletons = new ArrayList<Context>();
+ Map<Integer,Context> entities = new HashMap<>();
+ List<Context> singletons = new ArrayList<>();
for (int ei = 0, el = extentContexts.length; ei < el; ei++) {
Context ec = extentContexts[ei];
//System.err.println("NumberModel.setExtents: ec("+ec.getId()+") "+ec.toText());
@@ -131,58 +127,60 @@ public class NumberModel implements TestNumberModel, TrainSimilarityModel {
singletons.add(ec);
}
}
- List<Context> singles = new ArrayList<Context>();
- List<Context> plurals = new ArrayList<Context>();
+ List<Context> singles = new ArrayList<>();
+ List<Context> plurals = new ArrayList<>();
// coref entities
- for (Iterator<Integer> ei = entities.keySet().iterator(); ei.hasNext();) {
- Integer key = ei.next();
+ for (Integer key : entities.keySet()) {
List<Context> entityContexts = (List<Context>) entities.get(key);
NumberEnum number = getNumber(entityContexts);
if (number == NumberEnum.SINGULAR) {
singles.addAll(entityContexts);
- }
- else if (number == NumberEnum.PLURAL) {
+ } else if (number == NumberEnum.PLURAL) {
plurals.addAll(entityContexts);
}
}
// non-coref entities.
- for (Iterator<Context> ei = singletons.iterator(); ei.hasNext();) {
- Context ec = ei.next();
+ for (Context ec : singletons) {
NumberEnum number = getNumber(ec);
if (number == NumberEnum.SINGULAR) {
singles.add(ec);
- }
- else if (number == NumberEnum.PLURAL) {
+ } else if (number == NumberEnum.PLURAL) {
plurals.add(ec);
}
}
- for (Iterator<Context> si = singles.iterator(); si.hasNext();) {
- Context ec = si.next();
+ for (Context ec : singles) {
addEvent(NumberEnum.SINGULAR.toString(), ec);
}
- for (Iterator<Context> fi = plurals.iterator(); fi.hasNext();) {
- Context ec = fi.next();
- addEvent(NumberEnum.PLURAL.toString(),ec);
+ for (Context ec : plurals) {
+ addEvent(NumberEnum.PLURAL.toString(), ec);
}
}
+ @Override
public double[] numberDist(Context c) {
List<String> feats = getFeatures(c);
return testModel.eval(feats.toArray(new String[feats.size()]));
}
+ @Override
public int getSingularIndex() {
return singularIndex;
}
+ @Override
public int getPluralIndex() {
return pluralIndex;
}
+ @Override
public void trainModel() throws IOException {
- new SuffixSensitiveGISModelWriter(GIS.trainModel(
- ObjectStreamUtils.createObjectStream(events),100,10),
- new File(modelName + modelExtension)).persist();
+ TrainingParameters params = TrainingParameters.defaultParams();
+ params.put(TrainingParameters.ITERATIONS_PARAM, 100);
+ params.put(TrainingParameters.CUTOFF_PARAM, 10);
+ GISTrainer trainer = new GISTrainer();
+ trainer.init(params, null);
+ GISModel trainedModel = trainer.trainModel(ObjectStreamUtils.createObjectStream(events));
+ new BinaryGISModelWriter(trainedModel, new File(modelName + modelExtension)).persist();
}
}
diff --git a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
index e54c427..8bf468c 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
@@ -17,27 +17,30 @@
package opennlp.tools.coref.sim;
+import java.io.BufferedInputStream;
import java.io.BufferedReader;
+import java.io.DataInputStream;
import java.io.File;
+import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import opennlp.tools.coref.resolver.ResolverUtils;
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
-import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
+import opennlp.tools.ml.maxent.GISModel;
+import opennlp.tools.ml.maxent.GISTrainer;
+import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
+import opennlp.tools.ml.maxent.io.BinaryGISModelWriter;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
-import opennlp.tools.util.HashList;
import opennlp.tools.util.ObjectStreamUtils;
+import opennlp.tools.util.TrainingParameters;
/**
* Models semantic similarity between two mentions and returns a score based on
@@ -45,31 +48,33 @@ import opennlp.tools.util.ObjectStreamUtils;
*/
public class SimilarityModel implements TestSimilarityModel, TrainSimilarityModel {
- private String modelName;
- private String modelExtension = ".bin.gz";
+ private final String modelName;
+ private final String modelExtension = ".bin.gz";
private MaxentModel testModel;
private List<Event> events;
private int SAME_INDEX;
private static final String SAME = "same";
private static final String DIFF = "diff";
- private boolean debugOn = false;
+ private final boolean debugOn = false;
public static TestSimilarityModel testModel(String name) throws IOException {
return new SimilarityModel(name, false);
}
public static TrainSimilarityModel trainModel(String name) throws IOException {
- SimilarityModel sm = new SimilarityModel(name, true);
- return sm;
+ return new SimilarityModel(name, true);
}
private SimilarityModel(String modelName, boolean train) throws IOException {
this.modelName = modelName;
if (train) {
- events = new ArrayList<Event>();
+ events = new ArrayList<>();
}
else {
- testModel = (new SuffixSensitiveGISModelReader(new File(modelName + modelExtension))).getModel();
+ try (DataInputStream dis = new DataInputStream(
+ new BufferedInputStream(new FileInputStream(modelName + modelExtension)))) {
+ testModel = new BinaryGISModelReader(dis).getModel();
+ }
SAME_INDEX = testModel.getIndex(SAME);
}
}
@@ -98,16 +103,15 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
*/
private Set<String> constructHeadSet(List<Context> mentions) {
Set<String> headSet = new HashSet<String>();
- for (Iterator<Context> ei = mentions.iterator(); ei.hasNext();) {
- Context ec = ei.next();
+ for (Context ec : mentions) {
headSet.add(ec.getHeadTokenText().toLowerCase());
}
return headSet;
}
private boolean hasSameHead(Set<String> entityHeadSet, Set<String> candidateHeadSet) {
- for (Iterator<String> hi = entityHeadSet.iterator(); hi.hasNext();) {
- if (candidateHeadSet.contains(hi.next())) {
+ for (String s : entityHeadSet) {
+ if (candidateHeadSet.contains(s)) {
return true;
}
}
@@ -115,8 +119,8 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
private boolean hasSameNameType(Set<String> entityNameSet, Set<String> candidateNameSet) {
- for (Iterator<String> hi = entityNameSet.iterator(); hi.hasNext();) {
- if (candidateNameSet.contains(hi.next())) {
+ for (String s : entityNameSet) {
+ if (candidateNameSet.contains(s)) {
return true;
}
}
@@ -124,10 +128,9 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
private boolean hasSuperClass(List<Context> entityContexts, List<Context> candidateContexts) {
- for (Iterator<Context> ei = entityContexts.iterator(); ei.hasNext();) {
- Context ec = ei.next();
- for (Iterator<Context> cei = candidateContexts.iterator(); cei.hasNext();) {
- if (inSuperClass(ec, cei.next())) {
+ for (Context ec : entityContexts) {
+ for (Context candidateContext : candidateContexts) {
+ if (inSuperClass(ec, candidateContext)) {
return true;
}
}
@@ -149,48 +152,39 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
* with entity indicated by the specified key.
*/
@SuppressWarnings("unchecked")
- private Set<Context> constructExclusionSet(Integer entityKey, HashList entities, Map<Integer,
+ private Set<Context> constructExclusionSet(Integer entityKey, Map<Integer, Context> entities, Map<Integer,
Set<String>> headSets, Map<Integer, Set<String>> nameSets, List<Context> singletons) {
- Set<Context> exclusionSet = new HashSet<Context>();
+ Set<Context> exclusionSet = new HashSet<>();
Set<String> entityHeadSet = headSets.get(entityKey);
Set<String> entityNameSet = nameSets.get(entityKey);
List<Context> entityContexts = (List<Context>) entities.get(entityKey);
//entities
- for (Iterator<Integer> ei = entities.keySet().iterator(); ei.hasNext();) {
- Integer key = ei.next();
+ for (Integer key : entities.keySet()) {
List<Context> candidateContexts = (List<Context>) entities.get(key);
if (key.equals(entityKey)) {
exclusionSet.addAll(candidateContexts);
- }
- else if (nameSets.get(key).isEmpty()) {
+ } else if (nameSets.get(key).isEmpty()) {
exclusionSet.addAll(candidateContexts);
- }
- else if (hasSameHead(entityHeadSet, headSets.get(key))) {
+ } else if (hasSameHead(entityHeadSet, headSets.get(key))) {
exclusionSet.addAll(candidateContexts);
- }
- else if (hasSameNameType(entityNameSet, nameSets.get(key))) {
+ } else if (hasSameNameType(entityNameSet, nameSets.get(key))) {
exclusionSet.addAll(candidateContexts);
- }
- else if (hasSuperClass(entityContexts, candidateContexts)) {
+ } else if (hasSuperClass(entityContexts, candidateContexts)) {
exclusionSet.addAll(candidateContexts);
}
}
//singles
- List<Context> singles = new ArrayList<Context>(1);
- for (Iterator<Context> si = singletons.iterator(); si.hasNext();) {
- Context sc = si.next();
+ List<Context> singles = new ArrayList<>(1);
+ for (Context sc : singletons) {
singles.clear();
singles.add(sc);
if (entityHeadSet.contains(sc.getHeadTokenText().toLowerCase())) {
exclusionSet.add(sc);
- }
- else if (sc.getNameType() == null) {
+ } else if (sc.getNameType() == null) {
exclusionSet.add(sc);
- }
- else if (entityNameSet.contains(sc.getNameType())) {
+ } else if (entityNameSet.contains(sc.getNameType())) {
exclusionSet.add(sc);
- }
- else if (hasSuperClass(entityContexts, singles)) {
+ } else if (hasSuperClass(entityContexts, singles)) {
exclusionSet.add(sc);
}
}
@@ -206,10 +200,9 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
* generated from the mentions associated with that key.
*/
@SuppressWarnings("unchecked")
- private Map<Integer, Set<String>> constructHeadSets(HashList entities) {
- Map<Integer, Set<String>> headSets = new HashMap<Integer, Set<String>>();
- for (Iterator<Integer> ei = entities.keySet().iterator(); ei.hasNext();) {
- Integer key = ei.next();
+ private Map<Integer, Set<String>> constructHeadSets(Map<Integer, Context> entities) {
+ Map<Integer, Set<String>> headSets = new HashMap<>();
+ for (Integer key : entities.keySet()) {
List<Context> entityContexts = (List<Context>) entities.get(key);
headSets.put(key, constructHeadSet(entityContexts));
}
@@ -221,12 +214,11 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
*
* @param mentions A list of mentions.
*
- * @return A set set of name types assigned to the specified mentions.
+ * @return A set of name types assigned to the specified mentions.
*/
private Set<String> constructNameSet(List<Context> mentions) {
- Set<String> nameSet = new HashSet<String>();
- for (Iterator<Context> ei = mentions.iterator(); ei.hasNext();) {
- Context ec = ei.next();
+ Set<String> nameSet = new HashSet<>();
+ for (Context ec : mentions) {
if (ec.getNameType() != null) {
nameSet.add(ec.getNameType());
}
@@ -243,10 +235,9 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
* with the each mention of that entity.
*/
@SuppressWarnings("unchecked")
- private Map<Integer, Set<String>> constructNameSets(HashList entities) {
- Map<Integer, Set<String>> nameSets = new HashMap<Integer, Set<String>>();
- for (Iterator<Integer> ei = entities.keySet().iterator(); ei.hasNext();) {
- Integer key = ei.next();
+ private Map<Integer, Set<String>> constructNameSets(Map<Integer, Context> entities) {
+ Map<Integer, Set<String>> nameSets = new HashMap<>();
+ for (Integer key : entities.keySet()) {
List<Context> entityContexts = (List<Context>) entities.get(key);
nameSets.put(key, constructNameSet(entityContexts));
}
@@ -259,8 +250,7 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
else {
int numCommonSynsets = 0;
- for (Iterator<String> si = ec.getSynsets().iterator(); si.hasNext();) {
- String synset = si.next();
+ for (String synset : ec.getSynsets()) {
if (cec.getSynsets().contains(synset)) {
numCommonSynsets++;
}
@@ -283,20 +273,19 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
*/
+ @Override
@SuppressWarnings("unchecked")
public void setExtents(Context[] extentContexts) {
- HashList entities = new HashList();
- /** Extents which are not in a coreference chain. */
- List<Context> singletons = new ArrayList<Context>();
- List<Context> allExtents = new ArrayList<Context>();
+ Map<Integer, Context> entities = new HashMap<>();
+ /* Extents which are not in a coreference chain. */
+ List<Context> singletons = new ArrayList<>();
+ List<Context> allExtents = new ArrayList<>();
//populate data structures
- for (int ei = 0, el = extentContexts.length; ei < el; ei++) {
- Context ec = extentContexts[ei];
+ for (Context ec : extentContexts) {
//System.err.println("SimilarityModel: setExtents: ec("+ec.getId()+") "+ec.getNameType()+" "+ec);
if (ec.getId() == -1) {
singletons.add(ec);
- }
- else {
+ } else {
entities.put(ec.getId(), ec);
}
allExtents.add(ec);
@@ -306,8 +295,7 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
Map<Integer, Set<String>> headSets = constructHeadSets(entities);
Map<Integer, Set<String>> nameSets = constructNameSets(entities);
- for (Iterator<Integer> ei = entities.keySet().iterator(); ei.hasNext();) {
- Integer key = ei.next();
+ for (Integer key : entities.keySet()) {
Set<String> entityNameSet = nameSets.get(key);
if (entityNameSet.isEmpty()) {
continue;
@@ -333,7 +321,7 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
axi = (axi + 1) % allExtents.size();
if (!exclusionSet.contains(sec1)) {
if (debugOn) System.err.println(ec1.toString() + " " + entityNameSet + " "
- + sec1.toString() + " " + nameSets.get(sec1.getId()));
+ + sec1.toString() + " " + nameSets.get(sec1.getId()));
addEvent(false, ec1, sec1);
break;
}
@@ -354,6 +342,7 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
* @return a number between 0 and 1 which represents the models belief that the specified
* mentions are compatible.
*/
+ @Override
public double compatible(Context mention1, Context mention2) {
List<String> feats = getFeatures(mention1, mention2);
if (debugOn) System.err.println("SimilarityModel.compatible: feats=" + feats);
@@ -364,18 +353,22 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
* Train a model based on the previously supplied evidence.
* @see #setExtents(Context[])
*/
+ @Override
public void trainModel() throws IOException {
if (debugOn) {
FileWriter writer = new FileWriter(modelName + ".events");
- for (Iterator<Event> ei = events.iterator();ei.hasNext();) {
- Event e = ei.next();
+ for (Event e : events) {
writer.write(e.toString() + "\n");
}
writer.close();
}
- new SuffixSensitiveGISModelWriter(GIS.trainModel(
- ObjectStreamUtils.createObjectStream(events),100,10),
- new File(modelName + modelExtension)).persist();
+ TrainingParameters params = TrainingParameters.defaultParams();
+ params.put(TrainingParameters.ITERATIONS_PARAM, 100);
+ params.put(TrainingParameters.CUTOFF_PARAM, 10);
+ GISTrainer trainer = new GISTrainer();
+ trainer.init(params, null);
+ GISModel trainedModel = trainer.trainModel(ObjectStreamUtils.createObjectStream(events));
+ new BinaryGISModelWriter(trainedModel, new File(modelName + modelExtension)).persist();
}
private boolean isName(Context np) {
@@ -399,8 +392,8 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
List<String> features = new ArrayList<>(2 + synsets.size());
features.add("nn=" + name.getNameType() + "," + common.getNameType());
features.add("nw=" + name.getNameType() + "," + common.getHeadTokenText().toLowerCase());
- for (Iterator<String> si = synsets.iterator(); si.hasNext();) {
- features.add("ns=" + name.getNameType() + "," + si.next());
+ for (String synset : synsets) {
+ features.add("ns=" + name.getNameType() + "," + synset);
}
if (name.getNameType() == null) {
//features.addAll(getCommonCommonFeatures(name,common));
@@ -409,14 +402,14 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
private List<String> getNameNumberFeatures(Context name, Context number) {
- List<String> features = new ArrayList<String>(2);
+ List<String> features = new ArrayList<>(2);
features.add("nt=" + name.getNameType() + "," + number.getHeadTokenTag());
features.add("nn=" + name.getNameType() + "," + number.getNameType());
return features;
}
private List<String> getNamePronounFeatures(Context name, Context pronoun) {
- List<String> features = new ArrayList<String>(2);
+ List<String> features = new ArrayList<>(2);
features.add("nw=" + name.getNameType() + "," + pronoun.getHeadTokenText().toLowerCase());
features.add("ng=" + name.getNameType() + "," + ResolverUtils.getPronounGender(
pronoun.getHeadTokenText().toLowerCase()));
@@ -424,13 +417,12 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
private List<String> getCommonPronounFeatures(Context common, Context pronoun) {
- List<String> features = new ArrayList<String>();
+ List<String> features = new ArrayList<>();
Set<String> synsets1 = common.getSynsets();
String p = pronoun.getHeadTokenText().toLowerCase();
String gen = ResolverUtils.getPronounGender(p);
features.add("wn=" + p + "," + common.getNameType());
- for (Iterator<String> si = synsets1.iterator(); si.hasNext();) {
- String synset = si.next();
+ for (String synset : synsets1) {
features.add("ws=" + p + "," + synset);
features.add("gs=" + gen + "," + synset);
}
@@ -438,10 +430,9 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
private List<String> getCommonNumberFeatures(Context common, Context number) {
- List<String> features = new ArrayList<String>();
+ List<String> features = new ArrayList<>();
Set<String> synsets1 = common.getSynsets();
- for (Iterator<String> si = synsets1.iterator(); si.hasNext();) {
- String synset = si.next();
+ for (String synset : synsets1) {
features.add("ts=" + number.getHeadTokenTag() + "," + synset);
features.add("ns=" + number.getNameType() + "," + synset);
}
@@ -450,7 +441,7 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
private List<String> getNumberPronounFeatures(Context number, Context pronoun) {
- List<String> features = new ArrayList<String>();
+ List<String> features = new ArrayList<>();
String p = pronoun.getHeadTokenText().toLowerCase();
String gen = ResolverUtils.getPronounGender(p);
features.add("wt=" + p + "," + number.getHeadTokenTag());
@@ -461,7 +452,7 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
private List<String> getNameNameFeatures(Context name1, Context name2) {
- List<String> features = new ArrayList<String>(1);
+ List<String> features = new ArrayList<>(1);
if (name1.getNameType() == null && name2.getNameType() == null) {
features.add("nn=" + name1.getNameType() + "," + name2.getNameType());
//features.addAll(getCommonCommonFeatures(name1,name2));
@@ -489,7 +480,7 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
private List<String> getCommonCommonFeatures(Context common1, Context common2) {
- List<String> features = new ArrayList<String>();
+ List<String> features = new ArrayList<>();
Set<String> synsets1 = common1.getSynsets();
Set<String> synsets2 = common2.getSynsets();
@@ -502,8 +493,7 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
return features;
}
int numCommonSynsets = 0;
- for (Iterator<String> si = synsets1.iterator(); si.hasNext();) {
- String synset = si.next();
+ for (String synset : synsets1) {
if (synsets2.contains(synset)) {
features.add("ss=" + synset);
numCommonSynsets++;
@@ -527,7 +517,7 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
private List<String> getPronounPronounFeatures(Context pronoun1, Context pronoun2) {
- List<String> features = new ArrayList<String>();
+ List<String> features = new ArrayList<>();
String g1 = ResolverUtils.getPronounGender(pronoun1.getHeadTokenText());
String g2 = ResolverUtils.getPronounGender(pronoun2.getHeadTokenText());
if (g1.equals(g2)) {
@@ -540,7 +530,7 @@ public class SimilarityModel implements TestSimilarityModel, TrainSimilarityMode
}
private List<String> getFeatures(Context np1, Context np2) {
- List<String> features = new ArrayList<String>();
+ List<String> features = new ArrayList<>();
features.add("default");
// semantic categories
String w1 = np1.getHeadTokenText().toLowerCase();
diff --git a/opennlp-coref/src/main/java/opennlp/tools/formats/CorefSampleStreamFactory.java b/opennlp-coref/src/main/java/opennlp/tools/formats/CorefSampleStreamFactory.java
index 2dbbf74..9d4895d 100644
--- a/opennlp-coref/src/main/java/opennlp/tools/formats/CorefSampleStreamFactory.java
+++ b/opennlp-coref/src/main/java/opennlp/tools/formats/CorefSampleStreamFactory.java
@@ -18,6 +18,7 @@
package opennlp.tools.formats;
import java.io.FileInputStream;
+import java.io.IOException;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.CmdLineUtil;
@@ -25,6 +26,7 @@ import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.params.BasicFormatParams;
import opennlp.tools.coref.CorefSample;
import opennlp.tools.coref.CorefSampleDataStream;
+import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ParagraphStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -42,16 +44,19 @@ public class CorefSampleStreamFactory extends AbstractSampleStreamFactory<CorefS
StreamFactoryRegistry.registerFactory(CorefSample.class,
StreamFactoryRegistry.DEFAULT_FORMAT, new CorefSampleStreamFactory());
}
-
+
+ @Override
public ObjectStream<CorefSample> create(String[] args) {
Parameters params = ArgumentParser.parse(args, Parameters.class);
CmdLineUtil.checkInputFile("Data", params.getData());
- FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
-
- ObjectStream<String> lineStream = new ParagraphStream(new PlainTextByLineStream(sampleDataIn
- .getChannel(), params.getEncoding()));
-
- return new CorefSampleDataStream(lineStream);
+ try {
+ MarkableFileInputStreamFactory factory = new MarkableFileInputStreamFactory(params.getData());
+ ObjectStream<String> lineStream = new ParagraphStream(new PlainTextByLineStream(
+ factory, params.getEncoding()));
+ return new CorefSampleDataStream(lineStream);
+ } catch (IOException e) {
+ throw new RuntimeException("Error loading input data from parameters!", e);
+ }
}
}