You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by sm...@apache.org on 2017/10/23 22:16:24 UTC

[opennlp] branch master updated: OPENNLP-1148: use StandardCharsets.UTF_8 for the second parameter of PlainTextByLineStream() in doc (#279)

This is an automated email from the ASF dual-hosted git repository.

smarthi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new d624a37  OPENNLP-1148: use StandardCharsets.UTF_8 for the second parameter of PlainTextByLineStream() in doc (#279)
d624a37 is described below

commit d624a37e72ce52ca5a97aa077fb2d1b8e7229df1
Author: Koji Sekiguchi <ko...@rondhuit.com>
AuthorDate: Tue Oct 24 07:16:23 2017 +0900

    OPENNLP-1148: use StandardCharsets.UTF_8 for the second parameter of PlainTextByLineStream() in doc (#279)
---
 opennlp-docs/src/docbkx/doccat.xml     | 4 ++--
 opennlp-docs/src/docbkx/langdetect.xml | 4 ++--
 opennlp-docs/src/docbkx/lemmatizer.xml | 4 ++--
 opennlp-docs/src/docbkx/namefinder.xml | 2 +-
 opennlp-docs/src/docbkx/parser.xml     | 4 ++--
 opennlp-docs/src/docbkx/tokenizer.xml  | 5 ++---
 6 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/opennlp-docs/src/docbkx/doccat.xml b/opennlp-docs/src/docbkx/doccat.xml
index c056732..3c456b9 100644
--- a/opennlp-docs/src/docbkx/doccat.xml
+++ b/opennlp-docs/src/docbkx/doccat.xml
@@ -131,7 +131,7 @@ InputStream dataIn = null;
 
 try (dataIn = new FileInputStream("en-sentiment.train")) {
   ObjectStream<String> lineStream =
-		new PlainTextByLineStream(dataIn, "UTF-8");
+		new PlainTextByLineStream(dataIn, StandardCharsets.UTF_8);
   ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
 
   model = DocumentCategorizerME.train("en", sampleStream);
@@ -153,4 +153,4 @@ try (OutputStream modelOut = new BufferedOutputStream(new FileOutputStream(model
 		</para>
 		</section>
 	</section>
-</chapter>
\ No newline at end of file
+</chapter>
diff --git a/opennlp-docs/src/docbkx/langdetect.xml b/opennlp-docs/src/docbkx/langdetect.xml
index 67412a4..a5ca608 100644
--- a/opennlp-docs/src/docbkx/langdetect.xml
+++ b/opennlp-docs/src/docbkx/langdetect.xml
@@ -205,7 +205,7 @@ $ tail -20000 < leipzig_shuf.txt > leipzig.eval
 InputStreamFactory inputStreamFactory = new MarkableFileInputStreamFactory(new File("corpus.txt"));
 
 ObjectStream<String> lineStream =
-  new PlainTextByLineStream(inputStreamFactory, "UTF-8");
+  new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8);
 ObjectStream<LanguageSample> sampleStream = new LanguageDetectorSampleStream(lineStream);
 
 TrainingParameters params = ModelUtil.createDefaultTrainingParameters();
@@ -223,4 +223,4 @@ model.serialize(new File("langdetect.bin"));
 		</para>
 		</section>
 	</section>
-</chapter>
\ No newline at end of file
+</chapter>
diff --git a/opennlp-docs/src/docbkx/lemmatizer.xml b/opennlp-docs/src/docbkx/lemmatizer.xml
index 630b04d..60fba8f 100644
--- a/opennlp-docs/src/docbkx/lemmatizer.xml
+++ b/opennlp-docs/src/docbkx/lemmatizer.xml
@@ -302,7 +302,7 @@ InputStreamFactory inputStreamFactory = null;
     LemmaSampleStream lemmaStream = null;
     try {
       lineStream = new PlainTextByLineStream(
-      (inputStreamFactory), "UTF-8");
+      (inputStreamFactory), StandardCharsets.UTF_8);
       lemmaStream = new LemmaSampleStream(lineStream);
     } catch (IOException e) {
       CmdLineUtil.handleCreateObjectStreamError(e);
@@ -357,4 +357,4 @@ Accuracy: 0.9659110277825124]]>
 			 </screen>
 				</para>
 		</section>
-</chapter>
\ No newline at end of file
+</chapter>
diff --git a/opennlp-docs/src/docbkx/namefinder.xml b/opennlp-docs/src/docbkx/namefinder.xml
index 76261f7..cf12f48 100644
--- a/opennlp-docs/src/docbkx/namefinder.xml
+++ b/opennlp-docs/src/docbkx/namefinder.xml
@@ -268,7 +268,7 @@ $ opennlp TokenNameFinderTrainer -featuregen brown.xml -sequenceCodec BILOU -res
 			<programlisting language="java">
 				<![CDATA[
 ObjectStream<String> lineStream =
-		new PlainTextByLineStream(new FileInputStream("en-ner-person.train"), StandardCharsets.UTF8);
+		new PlainTextByLineStream(new FileInputStream("en-ner-person.train"), StandardCharsets.UTF_8);
 
 TokenNameFinderModel model;
 
diff --git a/opennlp-docs/src/docbkx/parser.xml b/opennlp-docs/src/docbkx/parser.xml
index 614293b..12de5fb 100644
--- a/opennlp-docs/src/docbkx/parser.xml
+++ b/opennlp-docs/src/docbkx/parser.xml
@@ -266,7 +266,7 @@ CmdLineUtil.checkOutputFile("parser model", modelOutFile);
 try {
   HeadRules rules = createHeadRules(params);
   InputStreamFactory inputStreamFactory = new MarkableFileInputStreamFactory(new File("parsing.train"));
-  ObjectStream<String> stringStream = new PlainTextByLineStream(inputStreamFactory, "UTF-8");
+  ObjectStream<String> stringStream = new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8);
   ObjectStream<Parse> sampleStream = new ParseSample(stringStream);
 
   ParserType type = parseParserType(params.getParserType());
@@ -366,7 +366,7 @@ System.out.println(result.toString());]]>
 			<programlisting language="java">
 				<![CDATA[
 InputStreamFactory inputStreamFactory = new MarkableFileInputStreamFactory(new File("parsing.train"));
-ObjectStream<String> stringStream = new PlainTextByLineStream(inputStreamFactory, "UTF-8");
+ObjectStream<String> stringStream = new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8);
 ObjectStream<Parse> sampleStream = new ParseSample(stringStream);
 ParserCrossValidator evaluator = new ParserCrossValidator("en", trainParameters, headRules, \
 parserType, listeners.toArray(new ParserEvaluationMonitor[listeners.size()])));
diff --git a/opennlp-docs/src/docbkx/tokenizer.xml b/opennlp-docs/src/docbkx/tokenizer.xml
index 6d54c3c..3fb4519 100644
--- a/opennlp-docs/src/docbkx/tokenizer.xml
+++ b/opennlp-docs/src/docbkx/tokenizer.xml
@@ -311,9 +311,8 @@ Path: en-token.bin]]>
                 The following sample code illustrates these steps:
                 <programlisting language="java">
                     <![CDATA[
-Charset charset = Charset.forName("UTF-8");
 ObjectStream<String> lineStream = new PlainTextByLineStream(new FileInputStream("en-sent.train"),
-    charset);
+    StandardCharsets.UTF_8);
 ObjectStream<TokenSample> sampleStream = new TokenSampleStream(lineStream);
 
 TokenizerModel model;
@@ -411,4 +410,4 @@ are very welcome. If you want to contribute please contact us on the mailing lis
 or comment on the jira issue <ulink url="https://issues.apache.org/jira/browse/OPENNLP-217">OPENNLP-217</ulink>.</para>
 		</section>
 	</section>
-</chapter>
\ No newline at end of file
+</chapter>

-- 
To stop receiving notification emails like this one, please contact
['"commits@opennlp.apache.org" <co...@opennlp.apache.org>'].