You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jk...@apache.org on 2011/11/17 04:16:01 UTC

svn commit: r1203036 - in /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats: BioNLP2004NameSampleStream.java Conll02NameSampleStream.java Conll03NameSampleStream.java ConllXPOSSampleStream.java ConllXPOSSampleStreamFactory.java

Author: jkosin
Date: Thu Nov 17 03:16:00 2011
New Revision: 1203036

URL: http://svn.apache.org/viewvc?rev=1203036&view=rev
Log:
OPENNLP-367: ConllX is UTF-8 always and is handled by the factory, Conll02 is UTF-8, Conll03 is ISO-8859-1, setup to set a System.out() to the same encoding as the input.  Should provide warning that the encoding may make the output non-legible by native system and the output needs to be piped or redirected to a file in all cases.

Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java?rev=1203036&r1=1203035&r2=1203036&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java Thu Nov 17 03:16:00 2011
@@ -19,6 +19,7 @@ package opennlp.tools.formats;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PrintStream;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.List;
@@ -54,6 +55,7 @@ public class BioNLP2004NameSampleStream 
   public BioNLP2004NameSampleStream(InputStream in, int types) {
     try {
       this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+      System.setOut(new PrintStream(System.out, true, "UTF-8"));
     } catch (UnsupportedEncodingException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java?rev=1203036&r1=1203035&r2=1203036&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java Thu Nov 17 03:16:00 2011
@@ -19,6 +19,7 @@ package opennlp.tools.formats;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PrintStream;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.List;
@@ -80,6 +81,7 @@ public class Conll02NameSampleStream imp
     this.lang = lang;
     try {
       this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+      System.setOut(new PrintStream(System.out, true, "UTF-8"));
     } catch (UnsupportedEncodingException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java?rev=1203036&r1=1203035&r2=1203036&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java Thu Nov 17 03:16:00 2011
@@ -19,6 +19,7 @@ import static opennlp.tools.formats.Conl
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.PrintStream;
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.List;
@@ -67,6 +68,7 @@ public class Conll03NameSampleStream imp
     this.lang = lang;
     try {
       this.lineStream = new PlainTextByLineStream(in, "ISO-8859-1");
+      System.setOut(new PrintStream(System.out, true, "ISO-8859-1"));
     } catch (UnsupportedEncodingException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java?rev=1203036&r1=1203035&r2=1203036&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java Thu Nov 17 03:16:00 2011
@@ -46,6 +46,7 @@ public class ConllXPOSSampleStream exten
   }
   
   ConllXPOSSampleStream(Reader in) throws IOException {
+    // encoding is handled by the factory...
     super(new ParagraphStream(new PlainTextByLineStream(in)));
   }
 

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java?rev=1203036&r1=1203035&r2=1203036&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java Thu Nov 17 03:16:00 2011
@@ -19,6 +19,7 @@ package opennlp.tools.formats;
 
 import java.io.File;
 import java.io.InputStreamReader;
+import java.io.PrintStream;
 import java.io.UnsupportedEncodingException;
 
 import opennlp.tools.cmdline.ArgumentParser;
@@ -38,9 +39,6 @@ public class ConllXPOSSampleStreamFactor
   interface Parameters {
     @ParameterDescription(valueName = "sampleData")
     String getData();
-    
-    @ParameterDescription(valueName = "charsetName")
-    String getEncoding();
   }
   
   public String getUsage() {
@@ -55,11 +53,12 @@ public class ConllXPOSSampleStreamFactor
     ObjectStream<String> lineStream;
     try {
       lineStream = new PlainTextByLineStream(new InputStreamReader(
-          CmdLineUtil.openInFile(new File(params.getData())), params.getEncoding()));
+          CmdLineUtil.openInFile(new File(params.getData())), "UTF-8"));
+      System.setOut(new PrintStream(System.out, true, "UTF-8"));
       
       return new ConllXPOSSampleStream(lineStream);
     } catch (UnsupportedEncodingException e) {
-      System.err.println("Encoding not supported: " + params.getEncoding());
+      // this shouldn't happen
       throw new TerminateToolException(-1);
     }
   }