You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/09/30 13:10:29 UTC
svn commit: r1177597 - in
/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats:
LeipzigDoccatSampleStream.java LeipzigDocumentSampleStreamFactory.java
Author: joern
Date: Fri Sep 30 11:10:28 2011
New Revision: 1177597
URL: http://svn.apache.org/viewvc?rev=1177597&view=rev
Log:
OPENNLP-305 Replaced encoding lookup with UTF-8 encoding, and removed restriction on specific language codes.
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java?rev=1177597&r1=1177596&r2=1177597&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java Fri Sep 30 11:10:28 2011
@@ -52,51 +52,11 @@ public class LeipzigDoccatSampleStream e
*/
LeipzigDoccatSampleStream(String language, int sentencesPerDocument,
InputStream in) throws IOException {
- super(new PlainTextByLineStream(in, mapLanguageToEncoding(language)));
+ super(new PlainTextByLineStream(in, "UTF-8"));
this.language = language;
this.sentencesPerDocument = sentencesPerDocument;
}
- /**
- * Maps the language to the file encoding, if the encoding
- * cannot be specified an IOException is thrown.
- *
- * @return
- * @throws IOException
- */
- private static String mapLanguageToEncoding(String language) throws IOException {
-
- if (language == null)
- throw new NullPointerException("language parameter must not be null!");
-
-
- Map<String, String> encodingMap = new HashMap<String, String>();
- encodingMap.put("cat", "ISO-8859-1");
- encodingMap.put("de", "ISO-8859-1");
- encodingMap.put("dk", "ISO-8859-1");
- encodingMap.put("ee", "ISO-8859-4");
- encodingMap.put("en", "ISO-8859-1");
- encodingMap.put("fi", "ISO-8859-1");
- encodingMap.put("fr", "ISO-8859-1");
- encodingMap.put("it", "ISO-8859-1");
- encodingMap.put("jp", "UTF-8");
- encodingMap.put("kr", "UTF-8");
- encodingMap.put("nl", "ISO-8859-1");
- encodingMap.put("no", "ISO-8859-1");
- encodingMap.put("se", "ISO-8859-1");
- encodingMap.put("sorb", "ISO-8859-2");
- encodingMap.put("tr", "ISO-8859-9");
-
- String encoding = encodingMap.get(language);
-
- if (encoding != null) {
- return encoding;
- }
- else {
- throw new IOException("Encoding for language " + language + " is not specified!");
- }
- }
-
public DocumentSample read() throws IOException {
int count = 0;
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java?rev=1177597&r1=1177596&r2=1177597&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java Fri Sep 30 11:10:28 2011
@@ -34,7 +34,7 @@ import opennlp.tools.util.ObjectStream;
public class LeipzigDocumentSampleStreamFactory implements ObjectStreamFactory<DocumentSample> {
interface Parameters {
- @ParameterDescription(valueName = "cat|de|dk|ee|en|fi|fr|it|jp|kr|nl|no|se|sorb|tr")
+ @ParameterDescription(valueName = "languageCode")
String getLang();
@ParameterDescription(valueName = "sampleData")