You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/05/19 14:58:14 UTC

[1/2] opennlp git commit: Hookup the langdetect converter to the cli

Repository: opennlp
Updated Branches:
  refs/heads/LangDetect eb6fb32d1 -> 4a4bf9100


Hookup the langdetect converter to the cli


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/696c1493
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/696c1493
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/696c1493

Branch: refs/heads/LangDetect
Commit: 696c149359bfc54e709d0f2844dc384a793a6e6e
Parents: eb6fb32
Author: Jörn Kottmann <jo...@apache.org>
Authored: Fri May 19 15:50:47 2017 +0200
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Fri May 19 15:55:22 2017 +0200

----------------------------------------------------------------------
 opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java  | 2 ++
 .../formats/leipzig/LeipzigLanguageSampleStreamFactory.java | 9 +++++----
 2 files changed, 7 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/696c1493/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
index 9cef28b..c828e26 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/CLI.java
@@ -37,6 +37,7 @@ import opennlp.tools.cmdline.doccat.DoccatEvaluatorTool;
 import opennlp.tools.cmdline.doccat.DoccatTool;
 import opennlp.tools.cmdline.doccat.DoccatTrainerTool;
 import opennlp.tools.cmdline.entitylinker.EntityLinkerTool;
+import opennlp.tools.cmdline.langdetect.LanguageDetectorConverterTool;
 import opennlp.tools.cmdline.langdetect.LanguageDetectorCrossValidatorTool;
 import opennlp.tools.cmdline.langdetect.LanguageDetectorEvaluatorTool;
 import opennlp.tools.cmdline.langdetect.LanguageDetectorTool;
@@ -97,6 +98,7 @@ public final class CLI {
     // Language Detector
     tools.add(new LanguageDetectorTool());
     tools.add(new LanguageDetectorTrainerTool());
+    tools.add(new LanguageDetectorConverterTool());
     tools.add(new LanguageDetectorCrossValidatorTool());
     tools.add(new LanguageDetectorEvaluatorTool());
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/696c1493/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
index 96b0378..59a7551 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
@@ -42,11 +42,11 @@ public class LeipzigLanguageSampleStreamFactory
 
     @ParameterDescription(valueName = "sentencesPerSample",
         description = "number of sentences per sample")
-    int getSentencesPerSample();
+    String getSentencesPerSample();
 
     @ParameterDescription(valueName = "samplesPerLanguage",
         description = "number of samples per language")
-    int getSamplesPerLanguage();
+    String getSamplesPerLanguage();
   }
 
   protected <P> LeipzigLanguageSampleStreamFactory(Class<P> params) {
@@ -64,8 +64,9 @@ public class LeipzigLanguageSampleStreamFactory
     File sentencesFileDir = params.getSentencesDir();
 
     try {
-      return new LeipzigLanguageSampleStream(sentencesFileDir, params.getSentencesPerSample(),
-          params.getSamplesPerLanguage());
+      return new LeipzigLanguageSampleStream(sentencesFileDir,
+          Integer.parseInt(params.getSentencesPerSample()),
+          Integer.parseInt(params.getSamplesPerLanguage()));
     } catch (IOException e) {
       throw new TerminateToolException(-1, "IO error while opening sample data.", e);
     }


[2/2] opennlp git commit: Remove number and tab from line

Posted by jo...@apache.org.
Remove number and tab from line


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/4a4bf910
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/4a4bf910
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/4a4bf910

Branch: refs/heads/LangDetect
Commit: 4a4bf910022a41a7aa3beb92c4880894723cd20d
Parents: 696c149
Author: Jörn Kottmann <jo...@apache.org>
Authored: Fri May 19 16:35:11 2017 +0200
Committer: Jörn Kottmann <jo...@apache.org>
Committed: Fri May 19 16:35:11 2017 +0200

----------------------------------------------------------------------
 .../tools/formats/leipzig/LeipzigLanguageSampleStream.java      | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/4a4bf910/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
index 582fb08..6c4d009 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStream.java
@@ -53,6 +53,7 @@ public class LeipzigLanguageSampleStream implements ObjectStream<LanguageSample>
 
     @Override
     public LanguageSample read() throws IOException {
+
       if (sampleCount < numberOfSamples) {
         StringBuilder sampleString = new StringBuilder();
 
@@ -60,8 +61,10 @@ public class LeipzigLanguageSampleStream implements ObjectStream<LanguageSample>
         String line;
         while (count < sentencesPerSample && (line = lineStream.read()) != null) {
 
+          int textStart = line.indexOf('\t') + 1;
+
           // TODO: It should it be changed to contain an array of sample strings ?!
-          sampleString.append(line + " ");
+          sampleString.append(line.substring(textStart) + " ");
 
           count++;
         }