You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@opennlp.apache.org by Mark G <gi...@gmail.com> on 2013/12/04 12:49:17 UTC
Re: svn commit: r1544904 - in /opennlp/sandbox/opennlp-coref: ./
src/main/java/opennlp/tools/coref/ src/main/java/opennlp/tools/coref/resolver/
src/main/java/opennlp/tools/coref/sim/
I have a lot of data laying around. How do I train it?
On Mon, Nov 25, 2013 at 3:02 PM, Jörn Kottmann <ko...@gmail.com> wrote:
> Actually that code should have compiled just fine against maxent 3.0.3.
>
> Anyway, the reason for the seperation from opennlp-tools is that we need
> to first build/finish the tooling
> to train the coref component. In my opinion this will be easier if we just
> let the code continue to use the old
> maxent library. After that is accomplished we could start updating and
> refactoring it and re-integrate it into opennlp-tools.
>
> Do you have some data sets you could train it on? I am happy to provide
> assitance and point out issues I encountered.
>
> Jörn
>
>
> On 11/24/2013 04:08 AM, markg@apache.org wrote:
>
>> Author: markg
>> Date: Sun Nov 24 03:08:54 2013
>> New Revision: 1544904
>>
>> URL: http://svn.apache.org/r1544904
>> Log:
>> OPENNLP-621
>> Fixed errors and changed all approprate imports to opennlp.tools.ml.
>> Builds but no testing done yet.
>>
>> Modified:
>> opennlp/sandbox/opennlp-coref/ (props changed)
>> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/CorefModel.java
>> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/DefaultNonReferentialResolver.java
>> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/MaxentResolver.java
>> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/GenderModel.java
>> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/NumberModel.java
>> opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/SimilarityModel.java
>>
>> Propchange: opennlp/sandbox/opennlp-coref/
>> ------------------------------------------------------------
>> ------------------
>> --- svn:ignore (added)
>> +++ svn:ignore Sun Nov 24 03:08:54 2013
>> @@ -0,0 +1 @@
>> +target
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/CorefModel.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/CorefModel.java?rev=
>> 1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/CorefModel.java
>> (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/CorefModel.java
>> Sun Nov 24 03:08:54 2013
>> @@ -26,9 +26,10 @@ import java.io.FileOutputStream;
>> import java.io.FileReader;
>> import java.io.IOException;
>> import java.util.zip.GZIPInputStream;
>> -
>> -import opennlp.maxent.io.BinaryGISModelReader;
>> -import opennlp.model.AbstractModel;
>> +import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
>> +//import opennlp.maxent.io.BinaryGISModelReader;
>> +//import opennlp.model.AbstractModel;
>> +import opennlp.tools.ml.model.AbstractModel;
>> import opennlp.tools.dictionary.Dictionary;
>> import opennlp.tools.util.StringList;
>> import opennlp.tools.util.model.BaseModel;
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/DefaultNonReferentialResolver.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.
>> java?rev=1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/DefaultNonReferentialResolver.java (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/DefaultNonReferentialResolver.java Sun Nov 24 03:08:54
>> 2013
>> @@ -25,14 +25,26 @@ import java.util.ArrayList;
>> import java.util.Iterator;
>> import java.util.List;
>> -import opennlp.maxent.GIS;
>> -import opennlp.maxent.io.BinaryGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> -import opennlp.model.Event;
>> -import opennlp.model.MaxentModel;
>> +//import opennlp.maxent.GIS;
>> +//import opennlp.maxent.io.BinaryGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
>> +import opennlp.tools.ml.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.model.Event;
>> +import opennlp.tools.ml.model.MaxentModel;
>> +//import opennlp.model.MaxentModel;
>> +
>> +import opennlp.tools.ml.model.EventStream;
>> +//import opennlp.model.MaxentModel;
>> import opennlp.tools.coref.mention.MentionContext;
>> import opennlp.tools.coref.mention.Parse;
>> +import opennlp.tools.ml.model.Event;
>> import opennlp.tools.util.CollectionEventStream;
>> /**
>> @@ -124,7 +136,7 @@ public class DefaultNonReferentialResolv
>> }
>> writer.close();
>> }
>> - (new SuffixSensitiveGISModelWriter(GIS.trainModel(new
>> CollectionEventStream(events),100,10),new File(modelName+modelExtension)
>> )).persist();
>> + (new SuffixSensitiveGISModelWriter(GIS.trainModel((EventStream)new
>> CollectionEventStream(events),100,10),new File(modelName+modelExtension)
>> )).persist();
>> }
>> }
>> }
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/MaxentResolver.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/resolver/MaxentResolver.
>> java?rev=1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/MaxentResolver.java (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/MaxentResolver.java Sun Nov 24 03:08:54 2013
>> @@ -24,15 +24,28 @@ import java.util.ArrayList;
>> import java.util.Iterator;
>> import java.util.List;
>> -import opennlp.maxent.GIS;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> -import opennlp.model.Event;
>> -import opennlp.model.MaxentModel;
>> +//import opennlp.maxent.GIS;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.model.EventStream;
>> +//import opennlp.model.MaxentModel;
>> +
>> +
>> +import opennlp.tools.ml.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.GIS;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.model.Event;
>> +//import opennlp.model.MaxentModel;
>> +import opennlp.tools.ml.model.MaxentModel;
>> +import opennlp.tools.ml.model.EventStream;
>> import opennlp.tools.coref.DiscourseEntity;
>> import opennlp.tools.coref.DiscourseModel;
>> import opennlp.tools.coref.mention.MentionContext;
>> import opennlp.tools.coref.sim.TestSimilarityModel;
>> +import opennlp.tools.ml.model.Event;
>> import opennlp.tools.util.CollectionEventStream;
>> /**
>> @@ -55,7 +68,7 @@ public abstract class MaxentResolver ext
>> private double[] candProbs;
>> private int sameIndex;
>> private ResolverMode mode;
>> - private List<Event> events;
>> + private List<opennlp.tools.ml.model.Event> events;
>> /** When true, this designates that the resolver should use the
>> first referent encountered which it
>> * more preferable than non-reference. When false all non-excluded
>> referents within this resolvers range
>> @@ -314,7 +327,7 @@ public abstract class MaxentResolver ext
>> }
>> writer.close();
>> }
>> - (new SuffixSensitiveGISModelWriter(GIS.trainModel(new
>> CollectionEventStream(events),100,10),new File(modelName+modelExtension)
>> )).persist();
>> + (new SuffixSensitiveGISModelWriter(GIS.trainModel((EventStream)new
>> CollectionEventStream(events),100,10),new File(modelName+modelExtension)
>> )).persist();
>> nonReferentialResolver.train();
>> }
>> }
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/GenderModel.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/sim/GenderModel.java?
>> rev=1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
>> (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
>> Sun Nov 24 03:08:54 2013
>> @@ -25,17 +25,26 @@ import java.io.FileWriter;
>> import java.io.IOException;
>> import java.io.InputStreamReader;
>> import java.util.ArrayList;
>> +import java.util.Collection;
>> import java.util.HashSet;
>> import java.util.Iterator;
>> import java.util.List;
>> import java.util.Set;
>> -import opennlp.maxent.GIS;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> -import opennlp.model.Event;
>> -import opennlp.model.MaxentModel;
>> +//import opennlp.maxent.GIS;
>> +import opennlp.tools.ml.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.model.Event;
>> +import opennlp.tools.ml.model.Event;
>> +import opennlp.tools.ml.model.MaxentModel;
>> +//import opennlp.model.MaxentModel;
>> import opennlp.tools.coref.resolver.ResolverUtils;
>> +import opennlp.tools.ml.model.AbstractModel;
>> +
>> +import opennlp.tools.ml.model.EventStream;
>> import opennlp.tools.util.CollectionEventStream;
>> import opennlp.tools.util.HashList;
>> @@ -51,7 +60,7 @@ public class GenderModel implements Test
>> private String modelName;
>> private String modelExtension = ".bin.gz";
>> private MaxentModel testModel;
>> - private List<Event> events;
>> + private Collection<Event> events;
>> private boolean debugOn = true;
>> private Set<String> maleNames;
>> @@ -267,9 +276,8 @@ public class GenderModel implements Test
>> writer.close();
>> }
>> new SuffixSensitiveGISModelWriter(
>> - GIS.trainModel(
>> - new CollectionEventStream(events), true),
>> - new File(modelName+modelExtension)).persist();
>> + // GIS.trainModel((EventStream)new
>> CollectionEventStream(events), true)).persist();
>> + (AbstractModel) GIS.trainModel((EventStream)new
>> CollectionEventStream(events), true), new File(modelName+modelExtension)
>> ).persist();
>> }
>> public int getFemaleIndex() {
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/NumberModel.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/sim/NumberModel.java?
>> rev=1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
>> (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
>> Sun Nov 24 03:08:54 2013
>> @@ -22,12 +22,16 @@ import java.io.IOException;
>> import java.util.ArrayList;
>> import java.util.Iterator;
>> import java.util.List;
>> -
>> -import opennlp.maxent.GIS;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> -import opennlp.model.Event;
>> -import opennlp.model.MaxentModel;
>> +import opennlp.tools.ml.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.GIS;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.model.Event;
>> +import opennlp.tools.ml.model.Event;
>> +//import opennlp.model.MaxentModel;
>> +import opennlp.tools.ml.model.MaxentModel;
>> import opennlp.tools.coref.resolver.ResolverUtils;
>> import opennlp.tools.util.CollectionEventStream;
>> import opennlp.tools.util.HashList;
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/SimilarityModel.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/sim/SimilarityModel.
>> java?rev=1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
>> (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
>> Sun Nov 24 03:08:54 2013
>> @@ -29,12 +29,17 @@ import java.util.Iterator;
>> import java.util.List;
>> import java.util.Map;
>> import java.util.Set;
>> -
>> -import opennlp.maxent.GIS;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> -import opennlp.model.Event;
>> -import opennlp.model.MaxentModel;
>> +import opennlp.tools.ml.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.GIS;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.model.Event;
>> +//import opennlp.model.MaxentModel;
>> +import opennlp.tools.ml.model.MaxentModel;
>> +//import opennlp.model.Event;
>> +//import opennlp.model.MaxentModel;
>> import opennlp.tools.coref.resolver.ResolverUtils;
>> import opennlp.tools.util.CollectionEventStream;
>> import opennlp.tools.util.HashList;
>>
>>
>>
>