You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2023/01/22 06:04:19 UTC
[opennlp-sandbox] branch master updated: updates sandbox component 'modelbuilder-addon' to be compatible with latest opennlp-tools release (#62)
This is an automated email from the ASF dual-hosted git repository.
mawiesne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp-sandbox.git
The following commit(s) were added to refs/heads/master by this push:
new cd95da1 updates sandbox component 'modelbuilder-addon' to be compatible with latest opennlp-tools release (#62)
cd95da1 is described below
commit cd95da14281f33884164ba34c6fd6f19553edee1
Author: Martin Wiesner <ma...@users.noreply.github.com>
AuthorDate: Sun Jan 22 07:04:15 2023 +0100
updates sandbox component 'modelbuilder-addon' to be compatible with latest opennlp-tools release (#62)
- adjusts opennlp-tools to 2.1.0
- adjusts parent project (org.apache.apache) to version 18
- adjusts Java language level to 11
- improves resource handling of streams
- removes funny pseudo JUnit-test which was effectively doing nothing
- removes unused imports
---
modelbuilder-addon/pom.xml | 61 +++++++++++++++++-----
.../modelbuilder/DefaultModelBuilderUtil.java | 20 ++++---
.../addons/modelbuilder/KnownEntityProvider.java | 35 ++++++-------
.../modelbuilder/ModelGenerationValidator.java | 5 +-
.../addons/modelbuilder/ModelParameter.java | 4 --
.../opennlp/addons/modelbuilder/Modelable.java | 7 +--
.../modelbuilder/SemiSupervisedModelGenerator.java | 4 --
.../addons/modelbuilder/SentenceProvider.java | 3 --
.../modelbuilder/impls/BaseModelBuilderParams.java | 1 -
.../impls/FileKnownEntityProvider.java | 19 +++----
.../modelbuilder/impls/FileModelValidatorImpl.java | 12 ++---
.../modelbuilder/impls/FileSentenceProvider.java | 10 ++--
.../modelbuilder/impls/GenericModelGenerator.java | 3 +-
.../modelbuilder/impls/GenericModelableImpl.java | 55 +++++++++----------
.../src/test/java/modelbuilder/AppTest.java | 38 --------------
15 files changed, 121 insertions(+), 156 deletions(-)
diff --git a/modelbuilder-addon/pom.xml b/modelbuilder-addon/pom.xml
index 4a9c886..6096303 100644
--- a/modelbuilder-addon/pom.xml
+++ b/modelbuilder-addon/pom.xml
@@ -1,35 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp</artifactId>
- <version>1.6.0-SNAPSHOT</version>
- <relativePath>../opennlp/pom.xml</relativePath>
+ <parent>
+ <groupId>org.apache</groupId>
+ <artifactId>apache</artifactId>
+ <!-- TODO OPENNLP-1452 once this is resolved, move to 29 as well. -->
+ <version>18</version>
+ <relativePath />
</parent>
<artifactId>modelbuilder-addon</artifactId>
- <version>1.0-SNAPSHOT</version>
+ <version>2.1.1-SNAPSHOT</version>
<packaging>jar</packaging>
- <name>modelbuilder-addon</name>
- <url>http://maven.apache.org</url>
+ <name>Apache OpenNLP ModelBuilder Addon</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
+ <dependency>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-tools</artifactId>
+ <version>2.1.0</version>
+ </dependency>
+
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>3.8.1</version>
+ <version>4.13.2</version>
<scope>test</scope>
</dependency>
- <dependency>
- <groupId>org.apache.opennlp</groupId>
- <artifactId>opennlp-tools</artifactId>
- <version>1.6.0-SNAPSHOT</version>
- </dependency>
</dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <configuration>
+ <source>11</source>
+ <target>11</target>
+ <compilerArgument>-Xlint</compilerArgument>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
</project>
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
index 81ff9fd..6fe5937 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/DefaultModelBuilderUtil.java
@@ -16,6 +16,7 @@
package opennlp.addons.modelbuilder;
import java.io.File;
+
import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
import opennlp.addons.modelbuilder.impls.FileKnownEntityProvider;
import opennlp.addons.modelbuilder.impls.FileModelValidatorImpl;
@@ -24,17 +25,14 @@ import opennlp.addons.modelbuilder.impls.GenericModelGenerator;
import opennlp.addons.modelbuilder.impls.GenericModelableImpl;
/**
- *
- * Utilizes the filebased implementations to produce an NER model from user
+ * Utilizes the file-based implementations to produce an NER model from user
* The basic processing is such
* read in the list of known entities
* annotate the sentences based on the list of known entities
* create a model from the annotations
* perform NER with the model on the sentences
* add the NER results to the annotations
- * rebuild the model
- * loop
- * defined data
+ * rebuild the model loop defined data.
*/
public class DefaultModelBuilderUtil {
@@ -74,20 +72,20 @@ public class DefaultModelBuilderUtil {
params.setKnownEntitiesFile(knownEntities);
params.setModelFile(modelOutFile);
params.setKnownEntityBlacklist(knownEntitiesBlacklist);
- /**
+ /*
* sentence providers feed this process with user data derived sentences
* this impl just reads line by line through a file
*/
SentenceProvider sentenceProvider = new FileSentenceProvider();
sentenceProvider.setParameters(params);
- /**
+ /*
* KnownEntityProviders provide a seed list of known entities... such as
* Barack Obama for person, or Germany for location obviously these would
* want to be prolific, non ambiguous names
*/
KnownEntityProvider knownEntityProvider = new FileKnownEntityProvider();
knownEntityProvider.setParameters(params);
- /**
+ /*
* ModelGenerationValidators try to weed out bad hits by the iterations of
* the name finder. Since this is a recursive process, with each iteration
* the namefinder will get more and more greedy if bad entities are allowed
@@ -98,17 +96,17 @@ public class DefaultModelBuilderUtil {
*/
ModelGenerationValidator validator = new FileModelValidatorImpl();
validator.setParameters(params);
- /**
+ /*
* Modelable's write and read the annotated sentences, as well as create and
* write the NER models
*/
Modelable modelable = new GenericModelableImpl();
modelable.setParameters(params);
- /**
+ /*
* the modelGenerator actually runs the process with a set number of
* iterations... could be better by actually calculating the diff between
- * runs and stopping based on a thresh, but for extrememly large sentence
+ * runs and stopping based on a threshold, but for extremely large sentence
* sets this may be too much.
*/
modelGenerator.build(sentenceProvider, knownEntityProvider, validator, modelable, iterations);
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
index 694250e..fa2a00e 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/KnownEntityProvider.java
@@ -17,29 +17,26 @@ package opennlp.addons.modelbuilder;
import java.util.Set;
-
-
/**
- *
-Supplies a list of known entities (a list of names or locations)
+ * Supplies a list of known entities (a list of names or locations)
*/
-public interface KnownEntityProvider extends ModelParameter{
+public interface KnownEntityProvider extends ModelParameter {
/**
- * returns a list of known non ambiguous entities.
- * @return a set of entities
- */
+ * returns a list of known non ambiguous entities.
+ * @return a set of entities
+ */
Set<String> getKnownEntities();
-/**
- * adds to the set of known entities. Overriding classes should hold this list in a class level set.
- * @param unambiguousEntity
- */
+
+ /**
+ * adds to the set of known entities. Overriding classes should hold this list in a class level set.
+ * @param unambiguousEntity
+ */
void addKnownEntity(String unambiguousEntity);
-/**
- * defines the type of entity that the set contains, ie person, location, organization.
- * @return
- */
+
+ /**
+ * defines the type of entity that the set contains, ie person, location, organization.
+ * @return
+ */
String getKnownEntitiesType();
-
-
-
+
}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
index 4bd5fe2..e8e8f7e 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelGenerationValidator.java
@@ -18,16 +18,13 @@ package opennlp.addons.modelbuilder;
import java.util.Collection;
/**
- *
-Validates results from the iterative namefinding
+ * Validates results from the iterative namefinding
*/
public interface ModelGenerationValidator extends ModelParameter {
Boolean validSentence(String sentence);
Boolean validNamedEntity(String namedEntity);
-
-
Collection<String> getBlackList();
}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java
index 136e775..e2e8649 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/ModelParameter.java
@@ -17,12 +17,8 @@ package opennlp.addons.modelbuilder;
import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
-/**
- *
- */
public interface ModelParameter<T extends BaseModelBuilderParams>{
void setParameters(T params);
-
}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java
index 80b0170..7c8f6a4 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/Modelable.java
@@ -16,14 +16,12 @@
package opennlp.addons.modelbuilder;
import java.util.Set;
-import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.namefind.TokenNameFinderModel;
/**
*
*/
-public interface Modelable extends ModelParameter{
-
-
+public interface Modelable extends ModelParameter {
String annotate(String sentence, String namedEntity, String entityType);
@@ -40,6 +38,5 @@ public interface Modelable extends ModelParameter{
TokenNameFinderModel getModel();
String[] tokenizeSentenceToWords(String sentence);
-
}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java
index c97a4c1..22807c9 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SemiSupervisedModelGenerator.java
@@ -17,10 +17,6 @@ package opennlp.addons.modelbuilder;
import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
-/**
- *
-
- */
public interface SemiSupervisedModelGenerator extends ModelParameter<BaseModelBuilderParams> {
void build(SentenceProvider sentenceProvider, KnownEntityProvider knownEntityProvider,
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java
index 5610224..1c655ad 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/SentenceProvider.java
@@ -18,9 +18,6 @@ package opennlp.addons.modelbuilder;
import java.util.Set;
import opennlp.addons.modelbuilder.impls.BaseModelBuilderParams;
-/**
- *
- */
public interface SentenceProvider extends ModelParameter<BaseModelBuilderParams> {
Set<String> getSentences();
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
index fcb2384..6173acc 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/BaseModelBuilderParams.java
@@ -19,7 +19,6 @@ import java.io.File;
import java.util.Map;
/**
- *
* Used to pass params through the processing
*/
public class BaseModelBuilderParams {
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
index 0de043c..841f6db 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileKnownEntityProvider.java
@@ -17,24 +17,22 @@ package opennlp.addons.modelbuilder.impls;
import java.io.BufferedReader;
import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
+
import opennlp.addons.modelbuilder.KnownEntityProvider;
-/**
- *
- */
public class FileKnownEntityProvider implements KnownEntityProvider {
- Set<String> knownEntities = new HashSet<String>();
+ final Set<String> knownEntities = new HashSet<>();
BaseModelBuilderParams params;
+
@Override
public Set<String> getKnownEntities() {
if (knownEntities.isEmpty()) {
@@ -44,7 +42,7 @@ public class FileKnownEntityProvider implements KnownEntityProvider {
String line;
fis = new FileInputStream(params.getKnownEntitiesFile());
- br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
+ br = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8));
while ((line = br.readLine()) != null) {
knownEntities.add(line);
}
@@ -53,8 +51,6 @@ public class FileKnownEntityProvider implements KnownEntityProvider {
br.close();
br = null;
fis = null;
- } catch (FileNotFoundException ex) {
- Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
}
@@ -69,14 +65,11 @@ public class FileKnownEntityProvider implements KnownEntityProvider {
@Override
public String getKnownEntitiesType() {
-
return params.getEntityType();
}
-
-
@Override
- public void setParameters(BaseModelBuilderParams params) {
+ public void setParameters(BaseModelBuilderParams params) {
this.params = params;
}
}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
index ea4bb05..8bc4954 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileModelValidatorImpl.java
@@ -17,16 +17,16 @@ package opennlp.addons.modelbuilder.impls;
import java.io.BufferedReader;
import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
+
import opennlp.addons.modelbuilder.ModelGenerationValidator;
/**
@@ -34,7 +34,7 @@ import opennlp.addons.modelbuilder.ModelGenerationValidator;
*/
public class FileModelValidatorImpl implements ModelGenerationValidator {
- private Set<String> badentities = new HashSet<String>();
+ private final Set<String> badentities = new HashSet<>();
BaseModelBuilderParams params;
@Override
@@ -59,7 +59,7 @@ public class FileModelValidatorImpl implements ModelGenerationValidator {
// if (p.matcher(namedEntity).find()) {
// return false;
// }
- Boolean b = true;
+ boolean b = true;
if (badentities.contains(namedEntity.toLowerCase())) {
b = false;
}
@@ -78,15 +78,13 @@ public class FileModelValidatorImpl implements ModelGenerationValidator {
String line;
fis = new FileInputStream(params.getKnownEntityBlacklist());
- br = new BufferedReader(new InputStreamReader(fis, Charset.forName("UTF-8")));
+ br = new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8));
while ((line = br.readLine()) != null) {
badentities.add(line);
}
br.close();
br = null;
fis = null;
- } catch (FileNotFoundException ex) {
- Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
index bea55f5..bf6fe6f 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/FileSentenceProvider.java
@@ -17,7 +17,6 @@ package opennlp.addons.modelbuilder.impls;
import java.io.BufferedReader;
import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
@@ -26,6 +25,7 @@ import java.util.HashSet;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
+
import opennlp.addons.modelbuilder.SentenceProvider;
/**
@@ -33,9 +33,10 @@ import opennlp.addons.modelbuilder.SentenceProvider;
*/
public class FileSentenceProvider implements SentenceProvider {
+ private final Set<String> sentences = new HashSet<>();
BaseModelBuilderParams params ;
- Set<String> sentences = new HashSet<String>();
+ @Override
public Set<String> getSentences() {
if (sentences.isEmpty()) {
try {
@@ -55,8 +56,6 @@ public class FileSentenceProvider implements SentenceProvider {
br.close();
br = null;
fis = null;
- } catch (FileNotFoundException ex) {
- Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(FileKnownEntityProvider.class.getName()).log(Level.SEVERE, null, ex);
}
@@ -64,7 +63,8 @@ public class FileSentenceProvider implements SentenceProvider {
return sentences;
}
- public void setParameters(BaseModelBuilderParams params) {
+ @Override
+ public void setParameters(BaseModelBuilderParams params) {
this.params = params;
}
}
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
index bbd23e1..8b11dac 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelGenerator.java
@@ -17,6 +17,7 @@ package opennlp.addons.modelbuilder.impls;
import java.util.HashMap;
import java.util.Map;
+
import opennlp.addons.modelbuilder.KnownEntityProvider;
import opennlp.addons.modelbuilder.ModelGenerationValidator;
import opennlp.addons.modelbuilder.Modelable;
@@ -31,7 +32,7 @@ import opennlp.tools.util.Span;
*/
public class GenericModelGenerator implements SemiSupervisedModelGenerator {
- private Map<String, String> params = new HashMap<String, String>();
+ private Map<String, String> params = new HashMap<>();
@Override
public void setParameters(BaseModelBuilderParams params) {
diff --git a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
index 572e84b..caa6ea8 100644
--- a/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
+++ b/modelbuilder-addon/src/main/java/opennlp/addons/modelbuilder/impls/GenericModelableImpl.java
@@ -16,30 +16,35 @@
package opennlp.addons.modelbuilder.impls;
import java.io.BufferedOutputStream;
-import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
-import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.HashSet;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
+
import opennlp.addons.modelbuilder.Modelable;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.NameSampleDataStream;
+import opennlp.tools.namefind.TokenNameFinderFactory;
import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.TrainingParameters;
/**
- * Creates annotations, writes annotations to file, and creates a model and writes to a file
+ * Creates annotations, writes annotations to file, and creates a model and writes to a file.
*/
public class GenericModelableImpl implements Modelable {
- private Set<String> annotatedSentences = new HashSet<String>();
+ private Set<String> annotatedSentences = new HashSet<>();
BaseModelBuilderParams params;
@Override
@@ -49,20 +54,15 @@ public class GenericModelableImpl implements Modelable {
@Override
public String annotate(String sentence, String namedEntity, String entityType) {
- String annotation = sentence.replace(namedEntity, " <START:" + entityType + "> " + namedEntity + " <END> ");
- return annotation;
+ return sentence.replace(namedEntity, " <START:" + entityType + "> " + namedEntity + " <END> ");
}
@Override
public void writeAnnotatedSentences() {
- try {
-
- FileWriter writer = new FileWriter(params.getAnnotatedTrainingDataFile(), false);
-
+ try (FileWriter writer = new FileWriter(params.getAnnotatedTrainingDataFile(), false)) {
for (String s : annotatedSentences) {
writer.write(s.replace("\n", " ").trim() + "\n");
}
- writer.close();
} catch (IOException ex) {
ex.printStackTrace();
}
@@ -85,34 +85,36 @@ public class GenericModelableImpl implements Modelable {
@Override
public void buildModel(String entityType) {
+ final InputStreamFactory factory;
try {
+ factory = new MarkableFileInputStreamFactory(params.getAnnotatedTrainingDataFile());
+ } catch (FileNotFoundException e) {
+ throw new RuntimeException("Error finding and reading the training data file!", e);
+ }
+
+ final TrainingParameters trainParams = TrainingParameters.defaultParams();
+
+ TokenNameFinderModel model;
+ try (ObjectStream<NameSample> samples =
+ new NameSampleDataStream(new PlainTextByLineStream(factory, StandardCharsets.UTF_8));
+ OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(params.getModelFile()))) {
+
System.out.println("\tBuilding Model using " + annotatedSentences.size() + " annotations");
System.out.println("\t\treading training data...");
- Charset charset = Charset.forName("UTF-8");
- ObjectStream<String> lineStream =
- new PlainTextByLineStream(new FileInputStream(params.getAnnotatedTrainingDataFile()), charset);
- ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream);
-
- TokenNameFinderModel model;
- model = NameFinderME.train("en", entityType, sampleStream, null);
- sampleStream.close();
- OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(params.getModelFile()));
+ model = NameFinderME.train("en", entityType, samples, trainParams, new TokenNameFinderFactory());
model.serialize(modelOut);
- if (modelOut != null) {
- modelOut.close();
- }
+
System.out.println("\tmodel generated");
} catch (Exception e) {
+ throw new RuntimeException("Error building model! " + e.getLocalizedMessage(), e);
}
}
@Override
public TokenNameFinderModel getModel() {
-
-
TokenNameFinderModel nerModel = null;
try {
- nerModel = new TokenNameFinderModel(new FileInputStream(params.getModelFile()));
+ nerModel = new TokenNameFinderModel(params.getModelFile());
} catch (IOException ex) {
Logger.getLogger(GenericModelableImpl.class.getName()).log(Level.SEVERE, null, ex);
}
@@ -122,6 +124,5 @@ public class GenericModelableImpl implements Modelable {
@Override
public String[] tokenizeSentenceToWords(String sentence) {
return sentence.split(" ");
-
}
}
diff --git a/modelbuilder-addon/src/test/java/modelbuilder/AppTest.java b/modelbuilder-addon/src/test/java/modelbuilder/AppTest.java
deleted file mode 100644
index 2b04731..0000000
--- a/modelbuilder-addon/src/test/java/modelbuilder/AppTest.java
+++ /dev/null
@@ -1,38 +0,0 @@
-package modelbuilder;
-
-import junit.framework.Test;
-import junit.framework.TestCase;
-import junit.framework.TestSuite;
-
-/**
- * Unit test for simple App.
- */
-public class AppTest
- extends TestCase
-{
- /**
- * Create the test case
- *
- * @param testName name of the test case
- */
- public AppTest( String testName )
- {
- super( testName );
- }
-
- /**
- * @return the suite of tests being tested
- */
- public static Test suite()
- {
- return new TestSuite( AppTest.class );
- }
-
- /**
- * Rigourous Test :-)
- */
- public void testApp()
- {
- assertTrue( true );
- }
-}