You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@opennlp.apache.org by Mark G <gi...@gmail.com> on 2013/12/04 12:49:17 UTC

Re: svn commit: r1544904 - in /opennlp/sandbox/opennlp-coref: ./ src/main/java/opennlp/tools/coref/ src/main/java/opennlp/tools/coref/resolver/ src/main/java/opennlp/tools/coref/sim/

I have a lot of data laying around. How do I train it?


On Mon, Nov 25, 2013 at 3:02 PM, Jörn Kottmann <ko...@gmail.com> wrote:

> Actually that code should have compiled just fine against maxent 3.0.3.
>
> Anyway, the reason for the seperation from opennlp-tools is that we need
> to first build/finish the tooling
> to train the coref component. In my opinion this will be easier if we just
> let the code continue to use the old
> maxent library. After that is accomplished we could start updating and
> refactoring it and re-integrate it into opennlp-tools.
>
> Do you have some data sets you could train it on? I am happy to provide
> assitance and point out issues I encountered.
>
> Jörn
>
>
> On 11/24/2013 04:08 AM, markg@apache.org wrote:
>
>> Author: markg
>> Date: Sun Nov 24 03:08:54 2013
>> New Revision: 1544904
>>
>> URL: http://svn.apache.org/r1544904
>> Log:
>> OPENNLP-621
>> Fixed errors and changed all approprate imports to opennlp.tools.ml.
>> Builds but no testing done yet.
>>
>> Modified:
>>      opennlp/sandbox/opennlp-coref/   (props changed)
>>      opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/CorefModel.java
>>      opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/DefaultNonReferentialResolver.java
>>      opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/MaxentResolver.java
>>      opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/GenderModel.java
>>      opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/NumberModel.java
>>      opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/SimilarityModel.java
>>
>> Propchange: opennlp/sandbox/opennlp-coref/
>> ------------------------------------------------------------
>> ------------------
>> --- svn:ignore (added)
>> +++ svn:ignore Sun Nov 24 03:08:54 2013
>> @@ -0,0 +1 @@
>> +target
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/CorefModel.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/CorefModel.java?rev=
>> 1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/CorefModel.java
>> (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/CorefModel.java
>> Sun Nov 24 03:08:54 2013
>> @@ -26,9 +26,10 @@ import java.io.FileOutputStream;
>>   import java.io.FileReader;
>>   import java.io.IOException;
>>   import java.util.zip.GZIPInputStream;
>> -
>> -import opennlp.maxent.io.BinaryGISModelReader;
>> -import opennlp.model.AbstractModel;
>> +import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
>> +//import opennlp.maxent.io.BinaryGISModelReader;
>> +//import opennlp.model.AbstractModel;
>> +import opennlp.tools.ml.model.AbstractModel;
>>   import opennlp.tools.dictionary.Dictionary;
>>   import opennlp.tools.util.StringList;
>>   import opennlp.tools.util.model.BaseModel;
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/DefaultNonReferentialResolver.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/resolver/DefaultNonReferentialResolver.
>> java?rev=1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/DefaultNonReferentialResolver.java (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/DefaultNonReferentialResolver.java Sun Nov 24 03:08:54
>> 2013
>> @@ -25,14 +25,26 @@ import java.util.ArrayList;
>>   import java.util.Iterator;
>>   import java.util.List;
>>   -import opennlp.maxent.GIS;
>> -import opennlp.maxent.io.BinaryGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> -import opennlp.model.Event;
>> -import opennlp.model.MaxentModel;
>> +//import opennlp.maxent.GIS;
>> +//import opennlp.maxent.io.BinaryGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.BinaryGISModelReader;
>> +import opennlp.tools.ml.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.model.Event;
>> +import opennlp.tools.ml.model.MaxentModel;
>> +//import opennlp.model.MaxentModel;
>> +
>> +import opennlp.tools.ml.model.EventStream;
>> +//import opennlp.model.MaxentModel;
>>   import opennlp.tools.coref.mention.MentionContext;
>>   import opennlp.tools.coref.mention.Parse;
>> +import opennlp.tools.ml.model.Event;
>>   import opennlp.tools.util.CollectionEventStream;
>>     /**
>> @@ -124,7 +136,7 @@ public class DefaultNonReferentialResolv
>>           }
>>           writer.close();
>>         }
>> -      (new SuffixSensitiveGISModelWriter(GIS.trainModel(new
>> CollectionEventStream(events),100,10),new File(modelName+modelExtension)
>> )).persist();
>> +      (new SuffixSensitiveGISModelWriter(GIS.trainModel((EventStream)new
>> CollectionEventStream(events),100,10),new File(modelName+modelExtension)
>> )).persist();
>>       }
>>     }
>>   }
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/MaxentResolver.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/resolver/MaxentResolver.
>> java?rev=1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/MaxentResolver.java (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/resolver/MaxentResolver.java Sun Nov 24 03:08:54 2013
>> @@ -24,15 +24,28 @@ import java.util.ArrayList;
>>   import java.util.Iterator;
>>   import java.util.List;
>>   -import opennlp.maxent.GIS;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> -import opennlp.model.Event;
>> -import opennlp.model.MaxentModel;
>> +//import opennlp.maxent.GIS;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.model.EventStream;
>> +//import opennlp.model.MaxentModel;
>> +
>> +
>> +import opennlp.tools.ml.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.GIS;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.model.Event;
>> +//import opennlp.model.MaxentModel;
>> +import opennlp.tools.ml.model.MaxentModel;
>> +import opennlp.tools.ml.model.EventStream;
>>   import opennlp.tools.coref.DiscourseEntity;
>>   import opennlp.tools.coref.DiscourseModel;
>>   import opennlp.tools.coref.mention.MentionContext;
>>   import opennlp.tools.coref.sim.TestSimilarityModel;
>> +import opennlp.tools.ml.model.Event;
>>   import opennlp.tools.util.CollectionEventStream;
>>     /**
>> @@ -55,7 +68,7 @@ public abstract class MaxentResolver ext
>>     private double[] candProbs;
>>     private int sameIndex;
>>     private ResolverMode mode;
>> -  private List<Event> events;
>> +  private List<opennlp.tools.ml.model.Event> events;
>>       /** When true, this designates that the resolver should use the
>> first referent encountered which it
>>      * more preferable than non-reference.  When false all non-excluded
>> referents within this resolvers range
>> @@ -314,7 +327,7 @@ public abstract class MaxentResolver ext
>>           }
>>           writer.close();
>>         }
>> -      (new SuffixSensitiveGISModelWriter(GIS.trainModel(new
>> CollectionEventStream(events),100,10),new File(modelName+modelExtension)
>> )).persist();
>> +      (new SuffixSensitiveGISModelWriter(GIS.trainModel((EventStream)new
>> CollectionEventStream(events),100,10),new File(modelName+modelExtension)
>> )).persist();
>>         nonReferentialResolver.train();
>>       }
>>     }
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/GenderModel.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/sim/GenderModel.java?
>> rev=1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
>> (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/GenderModel.java
>> Sun Nov 24 03:08:54 2013
>> @@ -25,17 +25,26 @@ import java.io.FileWriter;
>>   import java.io.IOException;
>>   import java.io.InputStreamReader;
>>   import java.util.ArrayList;
>> +import java.util.Collection;
>>   import java.util.HashSet;
>>   import java.util.Iterator;
>>   import java.util.List;
>>   import java.util.Set;
>>   -import opennlp.maxent.GIS;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> -import opennlp.model.Event;
>> -import opennlp.model.MaxentModel;
>> +//import opennlp.maxent.GIS;
>> +import opennlp.tools.ml.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.model.Event;
>> +import opennlp.tools.ml.model.Event;
>> +import opennlp.tools.ml.model.MaxentModel;
>> +//import opennlp.model.MaxentModel;
>>   import opennlp.tools.coref.resolver.ResolverUtils;
>> +import opennlp.tools.ml.model.AbstractModel;
>> +
>> +import opennlp.tools.ml.model.EventStream;
>>   import opennlp.tools.util.CollectionEventStream;
>>   import opennlp.tools.util.HashList;
>>   @@ -51,7 +60,7 @@ public class GenderModel implements Test
>>     private String modelName;
>>     private String modelExtension = ".bin.gz";
>>     private MaxentModel testModel;
>> -  private List<Event> events;
>> +  private Collection<Event> events;
>>     private boolean debugOn = true;
>>       private Set<String> maleNames;
>> @@ -267,9 +276,8 @@ public class GenderModel implements Test
>>         writer.close();
>>       }
>>       new SuffixSensitiveGISModelWriter(
>> -        GIS.trainModel(
>> -        new CollectionEventStream(events), true),
>> -        new File(modelName+modelExtension)).persist();
>> +           // GIS.trainModel((EventStream)new
>> CollectionEventStream(events), true)).persist();
>> +            (AbstractModel) GIS.trainModel((EventStream)new
>> CollectionEventStream(events), true), new File(modelName+modelExtension)
>> ).persist();
>>     }
>>       public int getFemaleIndex() {
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/NumberModel.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/sim/NumberModel.java?
>> rev=1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
>> (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/NumberModel.java
>> Sun Nov 24 03:08:54 2013
>> @@ -22,12 +22,16 @@ import java.io.IOException;
>>   import java.util.ArrayList;
>>   import java.util.Iterator;
>>   import java.util.List;
>> -
>> -import opennlp.maxent.GIS;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> -import opennlp.model.Event;
>> -import opennlp.model.MaxentModel;
>> +import opennlp.tools.ml.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.GIS;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +//import opennlp.model.Event;
>> +import opennlp.tools.ml.model.Event;
>> +//import opennlp.model.MaxentModel;
>> +import opennlp.tools.ml.model.MaxentModel;
>>   import opennlp.tools.coref.resolver.ResolverUtils;
>>   import opennlp.tools.util.CollectionEventStream;
>>   import opennlp.tools.util.HashList;
>>
>> Modified: opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/
>> coref/sim/SimilarityModel.java
>> URL: http://svn.apache.org/viewvc/opennlp/sandbox/opennlp-coref/
>> src/main/java/opennlp/tools/coref/sim/SimilarityModel.
>> java?rev=1544904&r1=1544903&r2=1544904&view=diff
>> ============================================================
>> ==================
>> --- opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
>> (original)
>> +++ opennlp/sandbox/opennlp-coref/src/main/java/opennlp/tools/coref/sim/SimilarityModel.java
>> Sun Nov 24 03:08:54 2013
>> @@ -29,12 +29,17 @@ import java.util.Iterator;
>>   import java.util.List;
>>   import java.util.Map;
>>   import java.util.Set;
>> -
>> -import opennlp.maxent.GIS;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> -import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> -import opennlp.model.Event;
>> -import opennlp.model.MaxentModel;
>> +import opennlp.tools.ml.maxent.GIS;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.GIS;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelReader;
>> +//import opennlp.maxent.io.SuffixSensitiveGISModelWriter;
>> +import opennlp.tools.ml.model.Event;
>> +//import opennlp.model.MaxentModel;
>> +import opennlp.tools.ml.model.MaxentModel;
>> +//import opennlp.model.Event;
>> +//import opennlp.model.MaxentModel;
>>   import opennlp.tools.coref.resolver.ResolverUtils;
>>   import opennlp.tools.util.CollectionEventStream;
>>   import opennlp.tools.util.HashList;
>>
>>
>>
>