You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@streampipes.apache.org by ze...@apache.org on 2020/05/02 15:16:15 UTC
[incubator-streampipes-extensions] branch dev updated: OpenNLP
models can now be uploaded into processors
This is an automated email from the ASF dual-hosted git repository.
zehnder pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-streampipes-extensions.git
The following commit(s) were added to refs/heads/dev by this push:
new 76a00f2 OpenNLP models can now be uploaded into processors
76a00f2 is described below
commit 76a00f28ce41416ab2db437404375afe93c6275d
Author: Philipp Zehnder <ze...@fzi.de>
AuthorDate: Sat May 2 17:11:36 2020 +0200
OpenNLP models can now be uploaded into processors
---
.../textmining/jvm/processor/chunker/Chunker.java | 26 +++++++++++++++------
.../jvm/processor/chunker/ChunkerController.java | 13 ++++++++++-
.../jvm/processor/chunker/ChunkerParameters.java | 8 ++++++-
.../jvm/processor/language/LanguageDetection.java | 22 +++++++++++------
.../language/LanguageDetectionController.java | 15 ++++++++++--
.../language/LanguageDetectionParameters.java | 8 ++++++-
.../processor/namefinder/NameFinderController.java | 1 +
.../processor/namefinder/NameFinderParameters.java | 1 +
.../jvm/processor/partofspeech/PartOfSpeech.java | 22 +++++++++++------
.../partofspeech/PartOfSpeechController.java | 15 ++++++++++--
.../partofspeech/PartOfSpeechParameters.java | 9 ++++++-
.../sentencedetection/SentenceDetection.java | 21 +++++++++++------
.../SentenceDetectionController.java | 15 ++++++++++--
.../SentenceDetectionParameters.java | 8 ++++++-
.../jvm/processor/tokenizer/Tokenizer.java | 22 +++++++++++------
.../processor/tokenizer/TokenizerController.java | 15 ++++++++++--
.../processor/tokenizer/TokenizerParameters.java | 8 ++++++-
.../src/main/resources/chunker-en.bin | Bin 2560304 -> 0 bytes
.../src/main/resources/language-detection.bin | Bin 10568188 -> 0 bytes
.../documentation.md | 2 ++
.../strings.en | 3 +++
.../documentation.md | 2 ++
.../strings.en | 3 +++
.../documentation.md | 1 +
.../strings.en | 1 +
.../documentation.md | 2 ++
.../strings.en | 3 +++
.../documentation.md | 2 ++
.../strings.en | 3 +++
.../documentation.md | 2 ++
.../strings.en | 3 +++
.../src/main/resources/partofspeech-en-v2.bin | Bin 5696197 -> 0 bytes
.../src/main/resources/sentence-detection-en.bin | Bin 98533 -> 0 bytes
.../src/main/resources/tokenizer-en.bin | Bin 439890 -> 0 bytes
34 files changed, 207 insertions(+), 49 deletions(-)
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/Chunker.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/Chunker.java
index 96a31e9..dbbcf64 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/Chunker.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/Chunker.java
@@ -20,6 +20,7 @@ package org.apache.streampipes.processors.textmining.jvm.processor.chunker;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
+import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.util.Span;
import org.apache.streampipes.commons.exceptions.SpRuntimeException;
import org.apache.streampipes.logging.api.Logger;
@@ -30,6 +31,7 @@ import org.apache.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.apache.streampipes.wrapper.routing.SpOutputCollector;
import org.apache.streampipes.wrapper.runtime.EventProcessor;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
@@ -44,21 +46,31 @@ public class Chunker implements EventProcessor<ChunkerParameters> {
private ChunkerME chunker;
public Chunker() {
- try (InputStream modelIn = getClass().getClassLoader().getResourceAsStream("chunker-en.bin")) {
- ChunkerModel model = new ChunkerModel(modelIn);
- chunker = new ChunkerME(model);
- } catch (IOException e) {
- e.printStackTrace();
- }
+// try (InputStream modelIn = getClass().getClassLoader().getResourceAsStream("chunker-en.bin")) {
+// ChunkerModel model = new ChunkerModel(modelIn);
+// chunker = new ChunkerME(model);
+// } catch (IOException e) {
+// e.printStackTrace();
+// }
}
@Override
public void onInvocation(ChunkerParameters chunkerParameters,
SpOutputCollector spOutputCollector,
- EventProcessorRuntimeContext runtimeContext) {
+ EventProcessorRuntimeContext runtimeContext) throws SpRuntimeException {
LOG = chunkerParameters.getGraph().getLogger(Chunker.class);
this.tags = chunkerParameters.getTags();
this.tokens = chunkerParameters.getTokens();
+
+ InputStream modelIn = new ByteArrayInputStream(chunkerParameters.getFileContent());
+ ChunkerModel model = null;
+ try {
+ model = new ChunkerModel(modelIn);
+ } catch (IOException e) {
+ throw new SpRuntimeException("Error when loading the uploaded model.", e);
+ }
+
+ chunker = new ChunkerME(model);
}
@Override
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/ChunkerController.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/ChunkerController.java
index c649471..1fe7c92 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/ChunkerController.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/ChunkerController.java
@@ -31,12 +31,15 @@ import org.apache.streampipes.sdk.utils.Datatypes;
import org.apache.streampipes.wrapper.standalone.ConfiguredEventProcessor;
import org.apache.streampipes.wrapper.standalone.declarer.StandaloneEventProcessingDeclarer;
+import java.io.IOException;
+
public class ChunkerController extends StandaloneEventProcessingDeclarer<ChunkerParameters> {
private static final String TAGS_FIELD_KEY = "tagsField";
private static final String TOKENS_FIELD_KEY = "tokensField";
static final String CHUNK_TYPE_FIELD_KEY = "chunkType";
static final String CHUNK_FIELD_KEY = "chunk";
+ private static final String BINARY_FILE_KEY = "binary-file";
@Override
public DataProcessorDescription declareModel() {
@@ -44,6 +47,7 @@ public class ChunkerController extends StandaloneEventProcessingDeclarer<Chunker
.category(DataProcessorType.ENRICH_TEXT)
.withAssets(Assets.DOCUMENTATION, Assets.ICON)
.withLocales(Locales.EN)
+ .requiredFile(Labels.withId(BINARY_FILE_KEY))
.requiredStream(StreamRequirementsBuilder
.create()
.requiredPropertyWithUnaryMapping(
@@ -73,7 +77,14 @@ public class ChunkerController extends StandaloneEventProcessingDeclarer<Chunker
String tags = extractor.mappingPropertyValue(TAGS_FIELD_KEY);
String tokens = extractor.mappingPropertyValue(TOKENS_FIELD_KEY);
- ChunkerParameters params = new ChunkerParameters(graph, tags, tokens);
+ byte[] fileContent = null;
+ try {
+ fileContent = extractor.fileContentsAsByteArray(BINARY_FILE_KEY);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ ChunkerParameters params = new ChunkerParameters(graph, tags, tokens, fileContent);
return new ConfiguredEventProcessor<>(params, Chunker::new);
}
}
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/ChunkerParameters.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/ChunkerParameters.java
index c0870cd..8c15bd0 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/ChunkerParameters.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/chunker/ChunkerParameters.java
@@ -24,12 +24,14 @@ import org.apache.streampipes.wrapper.params.binding.EventProcessorBindingParams
public class ChunkerParameters extends EventProcessorBindingParams {
private String tags;
private String tokens;
+ private byte[] fileContent;
- public ChunkerParameters(DataProcessorInvocation graph, String tags, String tokens)
+ public ChunkerParameters(DataProcessorInvocation graph, String tags, String tokens, byte[] fileContent)
{
super(graph);
this.tags = tags;
this.tokens = tokens;
+ this.fileContent = fileContent;
}
public String getTags() {
@@ -39,4 +41,8 @@ public class ChunkerParameters extends EventProcessorBindingParams {
public String getTokens() {
return tokens;
}
+
+ public byte[] getFileContent() {
+ return fileContent;
+ }
}
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetection.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetection.java
index af03e70..d0e3dea 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetection.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetection.java
@@ -22,12 +22,16 @@ import opennlp.tools.langdetect.Language;
import opennlp.tools.langdetect.LanguageDetector;
import opennlp.tools.langdetect.LanguageDetectorME;
import opennlp.tools.langdetect.LanguageDetectorModel;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import org.apache.streampipes.commons.exceptions.SpRuntimeException;
import org.apache.streampipes.logging.api.Logger;
import org.apache.streampipes.model.runtime.Event;
import org.apache.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.apache.streampipes.wrapper.routing.SpOutputCollector;
import org.apache.streampipes.wrapper.runtime.EventProcessor;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -39,20 +43,24 @@ public class LanguageDetection implements EventProcessor<LanguageDetectionParame
private LanguageDetector languageDetector;
public LanguageDetection() {
- try (InputStream modelIn = getClass().getClassLoader().getResourceAsStream("language-detection.bin")) {
- LanguageDetectorModel model = new LanguageDetectorModel(modelIn);
- languageDetector = new LanguageDetectorME(model);
- } catch (IOException e) {
- e.printStackTrace();
- }
}
@Override
public void onInvocation(LanguageDetectionParameters languageDetectionParameters,
SpOutputCollector spOutputCollector,
- EventProcessorRuntimeContext runtimeContext) {
+ EventProcessorRuntimeContext runtimeContext) throws SpRuntimeException {
LOG = languageDetectionParameters.getGraph().getLogger(LanguageDetection.class);
this.detection = languageDetectionParameters.getDetectionName();
+
+ InputStream modelIn = new ByteArrayInputStream(languageDetectionParameters.getFileContent());
+ LanguageDetectorModel model = null;
+ try {
+ model = new LanguageDetectorModel(modelIn);
+ } catch (IOException e) {
+ throw new SpRuntimeException("Error when loading the uploaded model.", e);
+ }
+
+ languageDetector = new LanguageDetectorME(model);
}
@Override
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetectionController.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetectionController.java
index 6bc1b2a..be3cf0b 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetectionController.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetectionController.java
@@ -30,18 +30,22 @@ import org.apache.streampipes.sdk.utils.Assets;
import org.apache.streampipes.wrapper.standalone.ConfiguredEventProcessor;
import org.apache.streampipes.wrapper.standalone.declarer.StandaloneEventProcessingDeclarer;
+import java.io.IOException;
+
public class LanguageDetectionController extends StandaloneEventProcessingDeclarer<LanguageDetectionParameters> {
private static final String DETECTION_FIELD_KEY = "detectionField";
static final String LANGUAGE_KEY = "language";
static final String CONFIDENCE_KEY = "confidenceLanguage";
+ private static final String BINARY_FILE_KEY = "binary-file";
@Override
public DataProcessorDescription declareModel() {
return ProcessingElementBuilder.create("org.apache.streampipes.processors.textmining.jvm.languagedetection")
.category(DataProcessorType.ENRICH_TEXT)
- .withAssets(Assets.DOCUMENTATION)
+ .withAssets(Assets.DOCUMENTATION, Assets.ICON)
.withLocales(Locales.EN)
+ .requiredFile(Labels.withId(BINARY_FILE_KEY))
.requiredStream(StreamRequirementsBuilder
.create()
.requiredPropertyWithUnaryMapping(
@@ -66,7 +70,14 @@ public class LanguageDetectionController extends StandaloneEventProcessingDeclar
String detection = extractor.mappingPropertyValue(DETECTION_FIELD_KEY);
- LanguageDetectionParameters params = new LanguageDetectionParameters(graph, detection);
+ byte[] fileContent = null;
+ try {
+ fileContent = extractor.fileContentsAsByteArray(BINARY_FILE_KEY);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ LanguageDetectionParameters params = new LanguageDetectionParameters(graph, detection, fileContent);
return new ConfiguredEventProcessor<>(params, LanguageDetection::new);
}
}
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetectionParameters.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetectionParameters.java
index 3b7b034..2860386 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetectionParameters.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/language/LanguageDetectionParameters.java
@@ -22,15 +22,21 @@ import org.apache.streampipes.model.graph.DataProcessorInvocation;
import org.apache.streampipes.wrapper.params.binding.EventProcessorBindingParams;
public class LanguageDetectionParameters extends EventProcessorBindingParams {
+ private byte[] fileContent;
private String detectionName;
- public LanguageDetectionParameters(DataProcessorInvocation graph, String fieldName)
+ public LanguageDetectionParameters(DataProcessorInvocation graph, String fieldName, byte[] fileContent)
{
super(graph);
this.detectionName = fieldName;
+ this.fileContent = fileContent;
}
public String getDetectionName() {
return detectionName;
}
+
+ public byte[] getFileContent() {
+ return fileContent;
+ }
}
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/namefinder/NameFinderController.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/namefinder/NameFinderController.java
index 67898d2..12a9489 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/namefinder/NameFinderController.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/namefinder/NameFinderController.java
@@ -36,6 +36,7 @@ import org.apache.streampipes.wrapper.standalone.ConfiguredEventProcessor;
import org.apache.streampipes.wrapper.standalone.declarer.StandaloneEventProcessingDeclarer;
import java.io.File;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/namefinder/NameFinderParameters.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/namefinder/NameFinderParameters.java
index 9a5f2b7..710fff6 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/namefinder/NameFinderParameters.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/namefinder/NameFinderParameters.java
@@ -38,4 +38,5 @@ public class NameFinderParameters extends EventProcessorBindingParams {
public String getModel() {
return model;
}
+
}
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeech.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeech.java
index d0751ae..2b2d2a9 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeech.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeech.java
@@ -18,8 +18,11 @@
package org.apache.streampipes.processors.textmining.jvm.processor.partofspeech;
+import opennlp.tools.langdetect.LanguageDetectorME;
+import opennlp.tools.langdetect.LanguageDetectorModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
+import org.apache.streampipes.commons.exceptions.SpRuntimeException;
import org.apache.streampipes.logging.api.Logger;
import org.apache.streampipes.model.runtime.Event;
import org.apache.streampipes.model.runtime.field.ListField;
@@ -27,6 +30,7 @@ import org.apache.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.apache.streampipes.wrapper.routing.SpOutputCollector;
import org.apache.streampipes.wrapper.runtime.EventProcessor;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -38,20 +42,24 @@ public class PartOfSpeech implements EventProcessor<PartOfSpeechParameters> {
private POSTaggerME posTagger;
public PartOfSpeech() {
- try (InputStream modelIn = getClass().getClassLoader().getResourceAsStream("partofspeech-en-v2.bin")) {
- POSModel model = new POSModel(modelIn);
- posTagger = new POSTaggerME(model);
- } catch (IOException e) {
- e.printStackTrace();
- }
}
@Override
public void onInvocation(PartOfSpeechParameters partOfSpeechParameters,
SpOutputCollector spOutputCollector,
- EventProcessorRuntimeContext runtimeContext) {
+ EventProcessorRuntimeContext runtimeContext) throws SpRuntimeException {
LOG = partOfSpeechParameters.getGraph().getLogger(PartOfSpeech.class);
this.detection = partOfSpeechParameters.getDetectionName();
+
+ InputStream modelIn = new ByteArrayInputStream(partOfSpeechParameters.getFileContent());
+ POSModel model = null;
+ try {
+ model = new POSModel(modelIn);
+ } catch (IOException e) {
+ throw new SpRuntimeException("Error when loading the uploaded model.", e);
+ }
+
+ posTagger = new POSTaggerME(model);
}
@Override
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeechController.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeechController.java
index d519294..d2ea1c9 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeechController.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeechController.java
@@ -31,18 +31,22 @@ import org.apache.streampipes.sdk.utils.Datatypes;
import org.apache.streampipes.wrapper.standalone.ConfiguredEventProcessor;
import org.apache.streampipes.wrapper.standalone.declarer.StandaloneEventProcessingDeclarer;
+import java.io.IOException;
+
public class PartOfSpeechController extends StandaloneEventProcessingDeclarer<PartOfSpeechParameters> {
private static final String DETECTION_FIELD_KEY = "detectionField";
static final String CONFIDENCE_KEY = "confidencePos";
static final String TAG_KEY = "tagPos";
+ private static final String BINARY_FILE_KEY = "binary-file";
@Override
public DataProcessorDescription declareModel() {
return ProcessingElementBuilder.create("org.apache.streampipes.processors.textmining.jvm.partofspeech")
.category(DataProcessorType.ENRICH_TEXT)
- .withAssets(Assets.DOCUMENTATION)
+ .withAssets(Assets.DOCUMENTATION, Assets.ICON)
.withLocales(Locales.EN)
+ .requiredFile(Labels.withId(BINARY_FILE_KEY))
.requiredStream(StreamRequirementsBuilder
.create()
.requiredPropertyWithUnaryMapping(
@@ -67,7 +71,14 @@ public class PartOfSpeechController extends StandaloneEventProcessingDeclarer<Pa
String detection = extractor.mappingPropertyValue(DETECTION_FIELD_KEY);
- PartOfSpeechParameters params = new PartOfSpeechParameters(graph, detection);
+ byte[] fileContent = null;
+ try {
+ fileContent = extractor.fileContentsAsByteArray(BINARY_FILE_KEY);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ PartOfSpeechParameters params = new PartOfSpeechParameters(graph, detection, fileContent);
return new ConfiguredEventProcessor<>(params, PartOfSpeech::new);
}
}
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeechParameters.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeechParameters.java
index 5b98671..1ad3374 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeechParameters.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/partofspeech/PartOfSpeechParameters.java
@@ -22,15 +22,22 @@ import org.apache.streampipes.model.graph.DataProcessorInvocation;
import org.apache.streampipes.wrapper.params.binding.EventProcessorBindingParams;
public class PartOfSpeechParameters extends EventProcessorBindingParams {
+
+ private byte[] fileContent;
private String detectionName;
- public PartOfSpeechParameters(DataProcessorInvocation graph, String fieldName)
+ public PartOfSpeechParameters(DataProcessorInvocation graph, String fieldName, byte[] fileContent)
{
super(graph);
this.detectionName = fieldName;
+ this.fileContent = fileContent;
}
public String getDetectionName() {
return detectionName;
}
+
+ public byte[] getFileContent() {
+ return fileContent;
+ }
}
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetection.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetection.java
index c680a36..0f0496d 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetection.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetection.java
@@ -20,12 +20,15 @@ package org.apache.streampipes.processors.textmining.jvm.processor.sentencedetec
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
+import org.apache.streampipes.commons.exceptions.SpException;
+import org.apache.streampipes.commons.exceptions.SpRuntimeException;
import org.apache.streampipes.logging.api.Logger;
import org.apache.streampipes.model.runtime.Event;
import org.apache.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.apache.streampipes.wrapper.routing.SpOutputCollector;
import org.apache.streampipes.wrapper.runtime.EventProcessor;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -38,20 +41,24 @@ public class SentenceDetection implements EventProcessor<SentenceDetectionParame
private SentenceDetectorME sentenceDetector ;
public SentenceDetection() {
- try (InputStream modelIn = getClass().getClassLoader().getResourceAsStream("sentence-detection-en.bin")) {
- SentenceModel model = new SentenceModel(modelIn);
- sentenceDetector = new SentenceDetectorME(model);
- } catch (IOException e) {
- e.printStackTrace();
- }
}
@Override
public void onInvocation(SentenceDetectionParameters sentenceDetectionParameters,
SpOutputCollector spOutputCollector,
- EventProcessorRuntimeContext runtimeContext) {
+ EventProcessorRuntimeContext runtimeContext) throws SpRuntimeException {
LOG = sentenceDetectionParameters.getGraph().getLogger(SentenceDetection.class);
this.detection = sentenceDetectionParameters.getDetectionName();
+
+ InputStream modelIn = new ByteArrayInputStream(sentenceDetectionParameters.getFileContent());
+ SentenceModel model = null;
+ try {
+ model = new SentenceModel(modelIn);
+ } catch (IOException e) {
+ throw new SpRuntimeException("Error when loading the uploaded model.", e);
+ }
+
+ sentenceDetector = new SentenceDetectorME(model);
}
@Override
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetectionController.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetectionController.java
index 5d33776..6516bd0 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetectionController.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetectionController.java
@@ -30,16 +30,20 @@ import org.apache.streampipes.sdk.utils.Assets;
import org.apache.streampipes.wrapper.standalone.ConfiguredEventProcessor;
import org.apache.streampipes.wrapper.standalone.declarer.StandaloneEventProcessingDeclarer;
+import java.io.IOException;
+
public class SentenceDetectionController extends StandaloneEventProcessingDeclarer<SentenceDetectionParameters> {
private static final String DETECTION_FIELD_KEY = "detectionField";
+ private static final String BINARY_FILE_KEY = "binary-file";
@Override
public DataProcessorDescription declareModel() {
return ProcessingElementBuilder.create("org.apache.streampipes.processors.textmining.jvm.sentencedetection")
.category(DataProcessorType.ENRICH_TEXT)
- .withAssets(Assets.DOCUMENTATION)
+ .withAssets(Assets.DOCUMENTATION, Assets.ICON)
.withLocales(Locales.EN)
+ .requiredFile(Labels.withId(BINARY_FILE_KEY))
.requiredStream(StreamRequirementsBuilder
.create()
.requiredPropertyWithUnaryMapping(
@@ -56,7 +60,14 @@ public class SentenceDetectionController extends StandaloneEventProcessingDeclar
String detection = extractor.mappingPropertyValue(DETECTION_FIELD_KEY);
- SentenceDetectionParameters params = new SentenceDetectionParameters(graph, detection);
+ byte[] fileContent = null;
+ try {
+ fileContent = extractor.fileContentsAsByteArray(BINARY_FILE_KEY);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ SentenceDetectionParameters params = new SentenceDetectionParameters(graph, detection, fileContent);
return new ConfiguredEventProcessor<>(params, SentenceDetection::new);
}
}
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetectionParameters.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetectionParameters.java
index fb2ae2c..2163683 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetectionParameters.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/sentencedetection/SentenceDetectionParameters.java
@@ -22,15 +22,21 @@ import org.apache.streampipes.model.graph.DataProcessorInvocation;
import org.apache.streampipes.wrapper.params.binding.EventProcessorBindingParams;
public class SentenceDetectionParameters extends EventProcessorBindingParams {
+ private byte[] fileContent;
private String detectionName;
- public SentenceDetectionParameters(DataProcessorInvocation graph, String fieldName)
+ public SentenceDetectionParameters(DataProcessorInvocation graph, String fieldName, byte[] fileContent)
{
super(graph);
this.detectionName = fieldName;
+ this.fileContent = fileContent;
}
public String getDetectionName() {
return detectionName;
}
+
+ public byte[] getFileContent() {
+ return fileContent;
+ }
}
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/Tokenizer.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/Tokenizer.java
index d2e9bf3..b456b36 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/Tokenizer.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/Tokenizer.java
@@ -18,14 +18,18 @@
package org.apache.streampipes.processors.textmining.jvm.processor.tokenizer;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
+import org.apache.streampipes.commons.exceptions.SpRuntimeException;
import org.apache.streampipes.logging.api.Logger;
import org.apache.streampipes.model.runtime.Event;
import org.apache.streampipes.wrapper.context.EventProcessorRuntimeContext;
import org.apache.streampipes.wrapper.routing.SpOutputCollector;
import org.apache.streampipes.wrapper.runtime.EventProcessor;
+import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
@@ -38,20 +42,24 @@ public class Tokenizer implements EventProcessor<TokenizerParameters> {
private TokenizerME tokenizer;
public Tokenizer() {
- try (InputStream modelIn = getClass().getClassLoader().getResourceAsStream("tokenizer-en.bin")) {
- TokenizerModel model = new TokenizerModel(modelIn);
- tokenizer = new TokenizerME(model);
- } catch (IOException e) {
- e.printStackTrace();
- }
}
@Override
public void onInvocation(TokenizerParameters tokenizerParameters,
SpOutputCollector spOutputCollector,
- EventProcessorRuntimeContext runtimeContext) {
+ EventProcessorRuntimeContext runtimeContext) throws SpRuntimeException {
LOG = tokenizerParameters.getGraph().getLogger(Tokenizer.class);
this.detection = tokenizerParameters.getDetectionName();
+
+ InputStream modelIn = new ByteArrayInputStream(tokenizerParameters.getFileContent());
+ TokenizerModel model = null;
+ try {
+ model = new TokenizerModel(modelIn);
+ } catch (IOException e) {
+ throw new SpRuntimeException("Error when loading the uploaded model.", e);
+ }
+
+ tokenizer = new TokenizerME(model);
}
@Override
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/TokenizerController.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/TokenizerController.java
index 4ab0f4f..dececa2 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/TokenizerController.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/TokenizerController.java
@@ -30,18 +30,22 @@ import org.apache.streampipes.sdk.utils.Assets;
import org.apache.streampipes.wrapper.standalone.ConfiguredEventProcessor;
import org.apache.streampipes.wrapper.standalone.declarer.StandaloneEventProcessingDeclarer;
+import java.io.IOException;
+
public class TokenizerController extends StandaloneEventProcessingDeclarer<TokenizerParameters> {
private static final String DETECTION_FIELD_KEY = "detectionField";
static final String TOKEN_LIST_FIELD_KEY = "tokenList";
+ private static final String BINARY_FILE_KEY = "binary-file";
//TODO: Maybe change outputStrategy to an array instead of tons of different strings
@Override
public DataProcessorDescription declareModel() {
return ProcessingElementBuilder.create("org.apache.streampipes.processors.textmining.jvm.tokenizer")
.category(DataProcessorType.ENRICH_TEXT)
- .withAssets(Assets.DOCUMENTATION)
+ .withAssets(Assets.DOCUMENTATION, Assets.ICON)
.withLocales(Locales.EN)
+ .requiredFile(Labels.withId(BINARY_FILE_KEY))
.requiredStream(StreamRequirementsBuilder
.create()
.requiredPropertyWithUnaryMapping(
@@ -60,7 +64,14 @@ public class TokenizerController extends StandaloneEventProcessingDeclarer<Token
String detection = extractor.mappingPropertyValue(DETECTION_FIELD_KEY);
- TokenizerParameters params = new TokenizerParameters(graph, detection);
+ byte[] fileContent = null;
+ try {
+ fileContent = extractor.fileContentsAsByteArray(BINARY_FILE_KEY);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ TokenizerParameters params = new TokenizerParameters(graph, detection, fileContent);
return new ConfiguredEventProcessor<>(params, Tokenizer::new);
}
}
diff --git a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/TokenizerParameters.java b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/TokenizerParameters.java
index 7d4537d..3c76cae 100644
--- a/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/TokenizerParameters.java
+++ b/streampipes-processors-text-mining-jvm/src/main/java/org/apache/streampipes/processors/textmining/jvm/processor/tokenizer/TokenizerParameters.java
@@ -22,15 +22,21 @@ import org.apache.streampipes.model.graph.DataProcessorInvocation;
import org.apache.streampipes.wrapper.params.binding.EventProcessorBindingParams;
public class TokenizerParameters extends EventProcessorBindingParams {
+ private byte[] fileContent;
private String detectionName;
- public TokenizerParameters(DataProcessorInvocation graph, String fieldName)
+ public TokenizerParameters(DataProcessorInvocation graph, String fieldName, byte[] fileContent)
{
super(graph);
this.detectionName = fieldName;
+ this.fileContent = fileContent;
}
public String getDetectionName() {
return detectionName;
}
+
+ public byte[] getFileContent() {
+ return fileContent;
+ }
}
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/chunker-en.bin b/streampipes-processors-text-mining-jvm/src/main/resources/chunker-en.bin
deleted file mode 100644
index 65d9356..0000000
Binary files a/streampipes-processors-text-mining-jvm/src/main/resources/chunker-en.bin and /dev/null differ
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/language-detection.bin b/streampipes-processors-text-mining-jvm/src/main/resources/language-detection.bin
deleted file mode 100644
index 0b4ea89..0000000
Binary files a/streampipes-processors-text-mining-jvm/src/main/resources/language-detection.bin and /dev/null differ
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.chunker/documentation.md b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.chunker/documentation.md
index fa5a1f0..623232e 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.chunker/documentation.md
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.chunker/documentation.md
@@ -41,6 +41,8 @@ Needs a stream with two string list properties:
## Configuration
Assign the tokens and the part of speech tags to the corresponding stream property.
+To use this component you have to download or train an openNLP model:
+https://opennlp.apache.org/models.html
## Output
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.chunker/strings.en b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.chunker/strings.en
index 31a9d95..5c786e5 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.chunker/strings.en
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.chunker/strings.en
@@ -12,3 +12,6 @@ confidence.description=The confidence with which each Chunk is tagged
chunk.title=Chunk
chunk.description=The chunk each token belongs to
+
+binary-file.title=Model File
+binary-file.description=Provide a model from the OpenNLP project. You can find the link in the documentation of this processor.
\ No newline at end of file
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.languagedetection/documentation.md b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.languagedetection/documentation.md
index c3025a7..76eb0b9 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.languagedetection/documentation.md
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.languagedetection/documentation.md
@@ -146,6 +146,8 @@ The longer the text, the higher the accuracy of the language detector.
## Configuration
Simply assign the correct output of the previous stream to the language detector input.
+To use this component you have to download or train an openNLP model:
+https://opennlp.apache.org/models.html
## Output
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.languagedetection/strings.en b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.languagedetection/strings.en
index 6f3d4d2..527114a 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.languagedetection/strings.en
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.languagedetection/strings.en
@@ -9,3 +9,6 @@ language.description=The acronym of the detected language
confidence.title=Confidence
confidence.description=The probability that the detected language is correct. Between 0 (no confidence) and 1 (highly confident).
+
+binary-file.title=Model File
+binary-file.description=Provide a model from the OpenNLP project. You can find the link in the documentation of this processor.
\ No newline at end of file
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.namefinder/documentation.md b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.namefinder/documentation.md
index f78dc2b..3fbcb79 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.namefinder/documentation.md
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.namefinder/documentation.md
@@ -43,6 +43,7 @@ A stream with a list of tokens from a text.
Configure the Name finder so that the tokens are assigned to the "List of Tokens" property
+
#### Model parameter
The trained model which should be used to find the names.
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.namefinder/strings.en b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.namefinder/strings.en
index 1189438..944a95b 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.namefinder/strings.en
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.namefinder/strings.en
@@ -9,3 +9,4 @@ tokensField.description=List of the tokens of the document
foundNames.title=Found Names
foundNames.description=The found names in the tokens
+
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.partofspeech/documentation.md b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.partofspeech/documentation.md
index d1e3691..1f05c4b 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.partofspeech/documentation.md
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.partofspeech/documentation.md
@@ -40,6 +40,8 @@ A stream with a list property which contains the tokens.
## Configuration
Simply assign the correct output of the previous stream to the part of speech detector input.
+To use this component you have to download or train an openNLP model:
+https://opennlp.apache.org/models.html
## Output
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.partofspeech/strings.en b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.partofspeech/strings.en
index de11a52..7d30a14 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.partofspeech/strings.en
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.partofspeech/strings.en
@@ -9,3 +9,6 @@ tagPos.description=The part of speech tag which was detected
confidencePos.title=Confidence
confidencePos.description=The probability that the detected part of speech is correct. Between 0 (no confidence) and 1 (highly confident).
+
+binary-file.title=Model File
+binary-file.description=Provide a model from the OpenNLP project. You can find the link in the documentation of this processor.
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.sentencedetection/documentation.md b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.sentencedetection/documentation.md
index 9a22eb7..30b0fee 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.sentencedetection/documentation.md
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.sentencedetection/documentation.md
@@ -39,6 +39,8 @@ A stream with a string property which contains a text.
## Configuration
Simply assign the correct output of the previous stream to the tokenizer input.
+To use this component you have to download or train an openNLP model:
+https://opennlp.apache.org/models.html
## Output
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.sentencedetection/strings.en b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.sentencedetection/strings.en
index 3d79fe2..acaf34e 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.sentencedetection/strings.en
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.sentencedetection/strings.en
@@ -3,3 +3,6 @@ org.apache.streampipes.processors.textmining.jvm.sentencedetection.description=S
detectionField.title=Sentence detection
detectionField.description=Specifies the event property from your stream that contains the text that should be split up
+
+binary-file.title=Model File
+binary-file.description=Provide a model from the OpenNLP project. You can find the link in the documentation of this processor.
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.tokenizer/documentation.md b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.tokenizer/documentation.md
index 61ac3a4..658d61c 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.tokenizer/documentation.md
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.tokenizer/documentation.md
@@ -39,6 +39,8 @@ A stream with a string property which contains a text.
## Configuration
Simply assign the correct output of the previous stream to the tokenizer input.
+To use this component you have to download or train an openNLP model:
+https://opennlp.apache.org/models.html
## Output
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.tokenizer/strings.en b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.tokenizer/strings.en
index 105691c..acd4029 100644
--- a/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.tokenizer/strings.en
+++ b/streampipes-processors-text-mining-jvm/src/main/resources/org.apache.streampipes.processors.textmining.jvm.tokenizer/strings.en
@@ -6,3 +6,6 @@ detectionField.description=Specifies the event property of your stream that cont
tokenList.title=List of Tokens
tokenList.description=List of all tokens in the text
+
+binary-file.title=Model File
+binary-file.description=Provide a model from the OpenNLP project. You can find the link in the documentation of this processor.
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/partofspeech-en-v2.bin b/streampipes-processors-text-mining-jvm/src/main/resources/partofspeech-en-v2.bin
deleted file mode 100644
index c8cae23..0000000
Binary files a/streampipes-processors-text-mining-jvm/src/main/resources/partofspeech-en-v2.bin and /dev/null differ
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/sentence-detection-en.bin b/streampipes-processors-text-mining-jvm/src/main/resources/sentence-detection-en.bin
deleted file mode 100644
index e89076b..0000000
Binary files a/streampipes-processors-text-mining-jvm/src/main/resources/sentence-detection-en.bin and /dev/null differ
diff --git a/streampipes-processors-text-mining-jvm/src/main/resources/tokenizer-en.bin b/streampipes-processors-text-mining-jvm/src/main/resources/tokenizer-en.bin
deleted file mode 100644
index c417277..0000000
Binary files a/streampipes-processors-text-mining-jvm/src/main/resources/tokenizer-en.bin and /dev/null differ