You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jk...@apache.org on 2011/11/17 04:16:01 UTC
svn commit: r1203036 - in
/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats:
BioNLP2004NameSampleStream.java Conll02NameSampleStream.java
Conll03NameSampleStream.java ConllXPOSSampleStream.java
ConllXPOSSampleStreamFactory.java
Author: jkosin
Date: Thu Nov 17 03:16:00 2011
New Revision: 1203036
URL: http://svn.apache.org/viewvc?rev=1203036&view=rev
Log:
OPENNLP-367: ConllX is UTF-8 always and is handled by the factory, Conll02 is UTF-8, Conll03 is ISO-8859-1, setup to set a System.out() to the same encoding as the input. Should provide warning that the encoding may make the output non-legible by native system and the output needs to be piped or redirected to a file in all cases.
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java?rev=1203036&r1=1203035&r2=1203036&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java Thu Nov 17 03:16:00 2011
@@ -19,6 +19,7 @@ package opennlp.tools.formats;
import java.io.IOException;
import java.io.InputStream;
+import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
@@ -54,6 +55,7 @@ public class BioNLP2004NameSampleStream
public BioNLP2004NameSampleStream(InputStream in, int types) {
try {
this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+ System.setOut(new PrintStream(System.out, true, "UTF-8"));
} catch (UnsupportedEncodingException e) {
// UTF-8 is available on all JVMs, will never happen
throw new IllegalStateException(e);
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java?rev=1203036&r1=1203035&r2=1203036&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java Thu Nov 17 03:16:00 2011
@@ -19,6 +19,7 @@ package opennlp.tools.formats;
import java.io.IOException;
import java.io.InputStream;
+import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
@@ -80,6 +81,7 @@ public class Conll02NameSampleStream imp
this.lang = lang;
try {
this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+ System.setOut(new PrintStream(System.out, true, "UTF-8"));
} catch (UnsupportedEncodingException e) {
// UTF-8 is available on all JVMs, will never happen
throw new IllegalStateException(e);
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java?rev=1203036&r1=1203035&r2=1203036&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java Thu Nov 17 03:16:00 2011
@@ -19,6 +19,7 @@ import static opennlp.tools.formats.Conl
import java.io.IOException;
import java.io.InputStream;
+import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
@@ -67,6 +68,7 @@ public class Conll03NameSampleStream imp
this.lang = lang;
try {
this.lineStream = new PlainTextByLineStream(in, "ISO-8859-1");
+ System.setOut(new PrintStream(System.out, true, "ISO-8859-1"));
} catch (UnsupportedEncodingException e) {
// UTF-8 is available on all JVMs, will never happen
throw new IllegalStateException(e);
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java?rev=1203036&r1=1203035&r2=1203036&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStream.java Thu Nov 17 03:16:00 2011
@@ -46,6 +46,7 @@ public class ConllXPOSSampleStream exten
}
ConllXPOSSampleStream(Reader in) throws IOException {
+ // encoding is handled by the factory...
super(new ParagraphStream(new PlainTextByLineStream(in)));
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java?rev=1203036&r1=1203035&r2=1203036&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXPOSSampleStreamFactory.java Thu Nov 17 03:16:00 2011
@@ -19,6 +19,7 @@ package opennlp.tools.formats;
import java.io.File;
import java.io.InputStreamReader;
+import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import opennlp.tools.cmdline.ArgumentParser;
@@ -38,9 +39,6 @@ public class ConllXPOSSampleStreamFactor
interface Parameters {
@ParameterDescription(valueName = "sampleData")
String getData();
-
- @ParameterDescription(valueName = "charsetName")
- String getEncoding();
}
public String getUsage() {
@@ -55,11 +53,12 @@ public class ConllXPOSSampleStreamFactor
ObjectStream<String> lineStream;
try {
lineStream = new PlainTextByLineStream(new InputStreamReader(
- CmdLineUtil.openInFile(new File(params.getData())), params.getEncoding()));
+ CmdLineUtil.openInFile(new File(params.getData())), "UTF-8"));
+ System.setOut(new PrintStream(System.out, true, "UTF-8"));
return new ConllXPOSSampleStream(lineStream);
} catch (UnsupportedEncodingException e) {
- System.err.println("Encoding not supported: " + params.getEncoding());
+ // this shouldn't happen
throw new TerminateToolException(-1);
}
}