You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/05/19 14:58:14 UTC
[1/2] opennlp git commit: Hookup the langdetect converter to the cli
Repository: opennlp
Updated Branches:
refs/heads/LangDetect eb6fb32d1 -> 4a4bf9100
Hookup the langdetect converter to the cli
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/696c1493
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/696c1493
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/696c1493
Branch: refs/heads/LangDetect
Commit: 696c149359bfc54e709d0f2844dc384a793a6e6e
Parents: eb6fb32
Author: Jörn Kottmann <jo...@apache.org>
Authored: Fri May 19 15:50:47 2017 +0200
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Fri May 19 15:55:22 2017 +0200
----------------------------------------------------------------------
opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java | 2 ++
.../formats/leipzig/LeipzigLanguageSampleStreamFactory.java | 9 +++++----
2 files changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/696c1493/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
index 9cef28b..c828e26 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
@@ -37,6 +37,7 @@ import opennlp.tools.cmdline.doccat.DoccatEvaluatorTool;
import opennlp.tools.cmdline.doccat.DoccatTool;
import opennlp.tools.cmdline.doccat.DoccatTrainerTool;
import opennlp.tools.cmdline.entitylinker.EntityLinkerTool;
+import opennlp.tools.cmdline.langdetect.LanguageDetectorConverterTool;
import opennlp.tools.cmdline.langdetect.LanguageDetectorCrossValidatorTool;
import opennlp.tools.cmdline.langdetect.LanguageDetectorEvaluatorTool;
import opennlp.tools.cmdline.langdetect.LanguageDetectorTool;
@@ -97,6 +98,7 @@ public final class CLI {
// Language Detector
tools.add(new LanguageDetectorTool());
tools.add(new LanguageDetectorTrainerTool());
+ tools.add(new LanguageDetectorConverterTool());
tools.add(new LanguageDetectorCrossValidatorTool());
tools.add(new LanguageDetectorEvaluatorTool());
http://git-wip-us.apache.org/repos/asf/opennlp/blob/696c1493/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
index 96b0378..59a7551 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
@@ -42,11 +42,11 @@ public class LeipzigLanguageSampleStreamFactory
@ParameterDescription(valueName = "sentencesPerSample",
description = "number of sentences per sample")
- int getSentencesPerSample();
+ String getSentencesPerSample();
@ParameterDescription(valueName = "samplesPerLanguage",
description = "number of samples per language")
- int getSamplesPerLanguage();
+ String getSamplesPerLanguage();
}
protected <P> LeipzigLanguageSampleStreamFactory(Class<P> params) {
@@ -64,8 +64,9 @@ public class LeipzigLanguageSampleStreamFactory
File sentencesFileDir = params.getSentencesDir();
try {
- return new LeipzigLanguageSampleStream(sentencesFileDir, params.getSentencesPerSample(),
- params.getSamplesPerLanguage());
+ return new LeipzigLanguageSampleStream(sentencesFileDir,
+ Integer.parseInt(params.getSentencesPerSample()),
+ Integer.parseInt(params.getSamplesPerLanguage()));
} catch (IOException e) {
throw new TerminateToolException(-1, "IO error while opening sample data.", e);
}
[2/2] opennlp git commit: Remove number and tab from line
Posted by jo...@apache.org.
Remove number and tab from line
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/4a4bf910
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/4a4bf910
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/4a4bf910
Branch: refs/heads/LangDetect
Commit: 4a4bf910022a41a7aa3beb92c4880894723cd20d
Parents: 696c149
Author: Jörn Kottmann <jo...@apache.org>
Authored: Fri May 19 16:35:11 2017 +0200
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Fri May 19 16:35:11 2017 +0200
----------------------------------------------------------------------
.../tools/formats/leipzig/LeipzigLanguageSampleStream.java | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4bf910/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
index 582fb08..6c4d009 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
@@ -53,6 +53,7 @@ public class LeipzigLanguageSampleStream implements ObjectStream<LanguageSample>
@Override
public LanguageSample read() throws IOException {
+
if (sampleCount < numberOfSamples) {
StringBuilder sampleString = new StringBuilder();
@@ -60,8 +61,10 @@ public class LeipzigLanguageSampleStream implements ObjectStream<LanguageSample>
String line;
while (count < sentencesPerSample && (line = lineStream.read()) != null) {
+ int textStart = line.indexOf('\t') + 1;
+
// TODO: It should it be changed to contain an array of sample strings ?!
- sampleString.append(line + " ");
+ sampleString.append(line.substring(textStart) + " ");
count++;
}