You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@ctakes.apache.org by ja...@apache.org on 2017/03/24 16:03:01 UTC
svn commit: r1788492 - in /ctakes/trunk: ./ ctakes-assertion/
ctakes-chunker/ ctakes-core/
ctakes-core/src/main/java/org/apache/ctakes/core/ae/
ctakes-core/src/main/java/org/apache/ctakes/core/sentence/
ctakes-pos-tagger/ ctakes-relation-extractor/ cta...
Author: james-masanz
Date: Fri Mar 24 16:03:01 2017
New Revision: 1788492
URL: http://svn.apache.org/viewvc?rev=1788492&view=rev
Log:
first pass at updating OpenNLP to 1.7.2 CTAKES-191
Modified:
ctakes/trunk/ctakes-assertion/pom.xml
ctakes/trunk/ctakes-chunker/pom.xml
ctakes/trunk/ctakes-core/pom.xml
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java
ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorCtakes.java
ctakes/trunk/ctakes-pos-tagger/pom.xml
ctakes/trunk/ctakes-relation-extractor/pom.xml
ctakes/trunk/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/annotators/SentenceDetector.java
ctakes/trunk/pom.xml
Modified: ctakes/trunk/ctakes-assertion/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-assertion/pom.xml?rev=1788492&r1=1788491&r2=1788492&view=diff
==============================================================================
--- ctakes/trunk/ctakes-assertion/pom.xml (original)
+++ ctakes/trunk/ctakes-assertion/pom.xml Fri Mar 24 16:03:01 2017
@@ -123,11 +123,11 @@
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-maxent</artifactId>
+ <artifactId>opennlp-tools</artifactId>
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-tools</artifactId>
+ <artifactId>opennlp-maxent</artifactId>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
Modified: ctakes/trunk/ctakes-chunker/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-chunker/pom.xml?rev=1788492&r1=1788491&r2=1788492&view=diff
==============================================================================
--- ctakes/trunk/ctakes-chunker/pom.xml (original)
+++ ctakes/trunk/ctakes-chunker/pom.xml Fri Mar 24 16:03:01 2017
@@ -55,11 +55,11 @@
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-maxent</artifactId>
+ <artifactId>opennlp-tools</artifactId>
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-tools</artifactId>
+ <artifactId>opennlp-maxent</artifactId>
</dependency>
<dependency>
<groupId>junit</groupId>
Modified: ctakes/trunk/ctakes-core/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/pom.xml?rev=1788492&r1=1788491&r2=1788492&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/pom.xml (original)
+++ ctakes/trunk/ctakes-core/pom.xml Fri Mar 24 16:03:01 2017
@@ -74,11 +74,11 @@
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-maxent</artifactId>
+ <artifactId>opennlp-tools</artifactId>
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-tools</artifactId>
+ <artifactId>opennlp-maxent</artifactId>
</dependency>
<!-- Todo : is lucene necessary at this level? -->
<dependency>
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java?rev=1788492&r1=1788491&r2=1788492&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/ae/SentenceDetector.java Fri Mar 24 16:03:01 2017
@@ -20,6 +20,7 @@ package org.apache.ctakes.core.ae;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.sentdetect.*;
+import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.TrainingParameters;
@@ -98,8 +99,7 @@ public class SentenceDetector extends JC
sdmodel = new SentenceModel(is);
EndOfSentenceScannerImpl eoss = new EndOfSentenceScannerImpl();
DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eoss.getEndOfSentenceCharacters());
- sentenceDetector = new SentenceDetectorCtakes(
- sdmodel.getMaxentModel(), cg, eoss);
+ sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss);
skipSegmentsSet = new HashSet<>();
if(skipSegmentsArray != null){
@@ -289,10 +289,15 @@ public class SentenceDetector extends JC
Charset charset = Charset.forName("UTF-8");
- SentenceModel mod = null;
+ SentenceModel mod = null;
- try(FileInputStream inStream = new FileInputStream(inFile)){
- ObjectStream<String> lineStream = new PlainTextByLineStream(inStream, charset);
+
+
+ MarkableFileInputStreamFactory mfisf = new MarkableFileInputStreamFactory(inFile);
+ ObjectStream<String> lineStream = null;
+ try {
+
+ lineStream = new PlainTextByLineStream(mfisf, charset);
ObjectStream<SentenceSample> sampleStream = new SentenceSampleStream(lineStream);
// Training Parameters
@@ -310,6 +315,8 @@ public class SentenceDetector extends JC
} finally {
sampleStream.close();
}
+ } catch (IOException e) {
+ lineStream.close();
}
try(FileOutputStream outStream = new FileOutputStream(outFile)){
Modified: ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorCtakes.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorCtakes.java?rev=1788492&r1=1788491&r2=1788492&view=diff
==============================================================================
--- ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorCtakes.java (original)
+++ ctakes/trunk/ctakes-core/src/main/java/org/apache/ctakes/core/sentence/SentenceDetectorCtakes.java Fri Mar 24 16:03:01 2017
@@ -29,11 +29,12 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import opennlp.maxent.GIS;
-import opennlp.maxent.GISModel;
+import opennlp.tools.ml.maxent.GISTrainer;
+import opennlp.tools.ml.maxent.GISModel;
import opennlp.model.EventStream;
-import opennlp.model.MaxentModel;
+import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.dictionary.Dictionary;
+import opennlp.tools.sentdetect.DefaultSDContextGenerator;
import opennlp.tools.sentdetect.EndOfSentenceScanner;
import opennlp.tools.sentdetect.SDContextGenerator;
import opennlp.tools.sentdetect.SDEventStream;
@@ -42,7 +43,10 @@ import opennlp.tools.sentdetect.Sentence
import opennlp.tools.sentdetect.SentenceSample;
import opennlp.tools.sentdetect.SentenceSampleStream;
import opennlp.tools.sentdetect.lang.Factory;
-import opennlp.tools.util.HashSumEventStream;
+import opennlp.tools.ml.model.Event;
+import opennlp.tools.ml.model.HashSumEventStream;
+import opennlp.tools.util.AbstractObjectStream;
+import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -99,7 +103,7 @@ public class SentenceDetectorCtakes {
*
* @param model the {@link SentenceModel}
*/
- public SentenceDetectorCtakes(MaxentModel model, SDContextGenerator cg, EndOfSentenceScanner eoss) {
+ public SentenceDetectorCtakes(MaxentModel model, DefaultSDContextGenerator cg, EndOfSentenceScanner eoss) {
this.model = model;
cgen = cg;
scanner = eoss;
@@ -107,7 +111,7 @@ public class SentenceDetectorCtakes {
}
- /**
+ /**
* Detect sentences in a String.
*
* @param s The string to be processed.
@@ -244,18 +248,18 @@ public class SentenceDetectorCtakes {
Factory factory = new Factory();
// TODO: Fix the EventStream to throw exceptions when training goes wrong
- EventStream eventStream = new SDEventStream(samples,
+ SDEventStream eventStream = new SDEventStream(samples,
factory.createSentenceContextGenerator(languageCode),
factory.createEndOfSentenceScanner(languageCode));
- HashSumEventStream hses = new HashSumEventStream(eventStream);
- GISModel sentModel = GIS.trainModel(hses, iterations, cutoff);
+ HashSumEventStream hses = new HashSumEventStream(eventStream); // AbstractObjectStream<Event>
+ GISTrainer trainer = new GISTrainer();
+ MaxentModel sentModel = trainer.trainModel(hses, iterations, cutoff);
manifestInfoEntries.put(BaseModel.TRAINING_EVENTHASH_PROPERTY,
hses.calculateHashSum().toString(16));
- return new SentenceModel(languageCode, sentModel,
- useTokenEnd, abbreviations, manifestInfoEntries);
+ return new SentenceModel(languageCode, sentModel, useTokenEnd, abbreviations, manifestInfoEntries);
}
private static void usage() {
@@ -324,10 +328,14 @@ public class SentenceDetectorCtakes {
if ((lang == null) || (encoding == null)) {
usage();
}
-
- SentenceModel model = train(lang, new SentenceSampleStream(new PlainTextByLineStream(
- new InputStreamReader(new FileInputStream(inFile), encoding))), true, null, cutoff, iters);
+ MarkableFileInputStreamFactory mfisf = new MarkableFileInputStreamFactory(inFile);
+ ObjectStream<String> lineStream = null;
+ lineStream = new PlainTextByLineStream(mfisf, encoding);
+ ObjectStream<SentenceSample> sampleStream = new SentenceSampleStream(lineStream);
+
+ //new PlainTextByLineStream(new InputStreamReader(new FileInputStream(inFile), encoding))
+ SentenceModel model = train(lang, sampleStream, true, null, cutoff, iters);
// TODO: add support for iterations and cutoff settings
Modified: ctakes/trunk/ctakes-pos-tagger/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-pos-tagger/pom.xml?rev=1788492&r1=1788491&r2=1788492&view=diff
==============================================================================
--- ctakes/trunk/ctakes-pos-tagger/pom.xml (original)
+++ ctakes/trunk/ctakes-pos-tagger/pom.xml Fri Mar 24 16:03:01 2017
@@ -51,11 +51,11 @@
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-maxent</artifactId>
+ <artifactId>opennlp-tools</artifactId>
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-tools</artifactId>
+ <artifactId>opennlp-maxent</artifactId>
</dependency>
<dependency>
<groupId>com.googlecode.clearnlp</groupId>
Modified: ctakes/trunk/ctakes-relation-extractor/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-relation-extractor/pom.xml?rev=1788492&r1=1788491&r2=1788492&view=diff
==============================================================================
--- ctakes/trunk/ctakes-relation-extractor/pom.xml (original)
+++ ctakes/trunk/ctakes-relation-extractor/pom.xml Fri Mar 24 16:03:01 2017
@@ -107,11 +107,11 @@
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-maxent</artifactId>
+ <artifactId>opennlp-tools</artifactId>
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-tools</artifactId>
+ <artifactId>opennlp-maxent</artifactId>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
Modified: ctakes/trunk/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/annotators/SentenceDetector.java
URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/annotators/SentenceDetector.java?rev=1788492&r1=1788491&r2=1788492&view=diff
==============================================================================
--- ctakes/trunk/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/annotators/SentenceDetector.java (original)
+++ ctakes/trunk/ctakes-ytex-uima/src/main/java/org/apache/ctakes/ytex/uima/annotators/SentenceDetector.java Fri Mar 24 16:03:01 2017
@@ -169,8 +169,7 @@ public class SentenceDetector extends JC
char[] eosc = eoss.getEndOfSentenceCharacters();
// SentenceDContextGenerator cg = new SentenceDContextGenerator();
DefaultSDContextGenerator cg = new DefaultSDContextGenerator(eosc);
- sentenceDetector = new SentenceDetectorCtakes(
- sdmodel.getMaxentModel(), cg, eoss);
+ sentenceDetector = new SentenceDetectorCtakes(sdmodel.getMaxentModel(), cg, eoss);
skipSegmentsSet = ParamUtil.getStringParameterValuesSet(
PARAM_SEGMENTS_TO_SKIP, context);
Modified: ctakes/trunk/pom.xml
URL: http://svn.apache.org/viewvc/ctakes/trunk/pom.xml?rev=1788492&r1=1788491&r2=1788492&view=diff
==============================================================================
--- ctakes/trunk/pom.xml (original)
+++ ctakes/trunk/pom.xml Fri Mar 24 16:03:01 2017
@@ -483,20 +483,20 @@
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-maxent</artifactId>
- <version>3.0.3</version>
- </dependency>
- <dependency>
- <groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-tools</artifactId>
- <version>1.5.3</version>
+ <version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.opennlp</groupId>
<artifactId>opennlp-uima</artifactId>
- <version>1.5.3</version>
+ <version>1.7.2</version>
</dependency>
<dependency>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-maxent</artifactId>
+ <version>3.0.3</version>
+ </dependency>
+ <dependency>
<groupId>org.apache.uima</groupId>
<artifactId>uimaj-examples</artifactId>
<version>2.9.0</version>