You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2014/02/04 18:10:12 UTC

svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/ main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/ main/java/opennlp/tools/cmdlin...

Author: markg
Date: Tue Feb  4 17:10:11 2014
New Revision: 1564379

URL: http://svn.apache.org/r1564379
Log:
OPENNLP-600
Changed to MockInputStreamFactory everywhere except where a reader was being used in the PlainTextBylineStream constructor

Added:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/MockInputStreamFactory.java
Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerDetailedFMeasureListenerTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerEvaluatorTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADNameSampleStreamTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADPOSSampleStreamTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADSentenceSampleStreamTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java Tue Feb  4 17:10:11 2014
@@ -36,7 +36,7 @@ public class MarkableFileInputStream ext
   MarkableFileInputStream(File file) throws FileNotFoundException {
     in = new FileInputStream(file);
   }
-  
+
   @Override
   public synchronized void mark(int readlimit) {
     try {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java Tue Feb  4 17:10:11 2014
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.cmdline.chunker;
 
 import java.io.File;
@@ -30,6 +29,7 @@ import opennlp.tools.cmdline.CmdLineUtil
 import opennlp.tools.cmdline.PerformanceMonitor;
 import opennlp.tools.postag.POSSample;
 import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
@@ -38,7 +38,7 @@ public class ChunkerMETool extends Basic
   public String getShortDescription() {
     return "learnable chunker";
   }
-  
+
   public String getHelp() {
     return "Usage: " + CLI.CMD + " " + getName() + " model < sentences";
   }
@@ -51,13 +51,12 @@ public class ChunkerMETool extends Basic
 
       ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE);
 
-      ObjectStream<String> lineStream =
-        new PlainTextByLineStream(new InputStreamReader(System.in));
-
-      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
-      perfMon.start();
+      ObjectStream<String> lineStream = null;
+      PerformanceMonitor perfMon = null;
 
       try {
+        lineStream = new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
+        perfMon = new PerformanceMonitor(System.err, "sent");
         String line;
         while ((line = lineStream.read()) != null) {
 
@@ -71,15 +70,14 @@ public class ChunkerMETool extends Basic
           }
 
           String[] chunks = chunker.chunk(posSample.getSentence(),
-              posSample.getTags());
+                  posSample.getTags());
 
           System.out.println(new ChunkSample(posSample.getSentence(),
-              posSample.getTags(), chunks).nicePrint());
+                  posSample.getTags(), chunks).nicePrint());
 
           perfMon.incrementCounter();
         }
-      }
-      catch (IOException e) {
+      } catch (IOException e) {
         CmdLineUtil.handleStdinIoError(e);
       }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java Tue Feb  4 17:10:11 2014
@@ -14,12 +14,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.cmdline.doccat;
 
 import java.io.File;
 import java.io.IOException;
-import java.io.InputStreamReader;
 
 import opennlp.tools.cmdline.BasicCmdLineTool;
 import opennlp.tools.cmdline.CLI;
@@ -32,19 +30,23 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ParagraphStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.MockInputStreamFactory;
 
 public class DoccatTool extends BasicCmdLineTool {
 
+  @Override
   public String getShortDescription() {
     return "learnable document categorizer";
   }
-  
+
+  @Override
   public String getHelp() {
     return "Usage: " + CLI.CMD + " " + getName() + " model < documents";
   }
 
+  @Override
   public void run(String[] args) {
-    
+
     if (0 == args.length) {
       System.out.println(getHelp());
     } else {
@@ -53,13 +55,21 @@ public class DoccatTool extends BasicCmd
 
       DocumentCategorizerME doccat = new DocumentCategorizerME(model);
 
-      ObjectStream<String> documentStream = new ParagraphStream(
-          new PlainTextByLineStream(new InputStreamReader(System.in)));
+      //ObjectStream<String> documentStream = new ParagraphStream(
+            //  new PlainTextByLineStream(new InputStreamReader(System.in)));
+      /**
+       * moved initialization to the try block to catch new IOException
+       */
+      ObjectStream<String> documentStream;
+
+
 
       PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc");
       perfMon.start();
 
       try {
+        documentStream = new ParagraphStream(
+                new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8"));
         String document;
         while ((document = documentStream.read()) != null) {
           double prob[] = doccat.categorize(WhitespaceTokenizer.INSTANCE.tokenize(document));
@@ -70,8 +80,7 @@ public class DoccatTool extends BasicCmd
 
           perfMon.incrementCounter();
         }
-      }
-      catch (IOException e) {
+      } catch (IOException e) {
         CmdLineUtil.handleStdinIoError(e);
       }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java Tue Feb  4 17:10:11 2014
@@ -40,10 +40,12 @@ public class DoccatTrainerTool
     super(DocumentSample.class, TrainerToolParams.class);
   }
 
+  @Override
   public String getShortDescription() {
     return "trainer for the learnable document categorizer";
   }
   
+  @Override
   public void run(String format, String[] args) {
     super.run(format, args);
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java Tue Feb  4 17:10:11 2014
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.cmdline.namefind;
 
 import java.io.File;
@@ -33,6 +32,7 @@ import opennlp.tools.namefind.NameSample
 import opennlp.tools.namefind.TokenNameFinder;
 import opennlp.tools.namefind.TokenNameFinderModel;
 import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.Span;
@@ -42,17 +42,17 @@ public final class TokenNameFinderTool e
   public String getShortDescription() {
     return "learnable name finder";
   }
-  
+
   public String getHelp() {
     return "Usage: " + CLI.CMD + " " + getName() + " model1 model2 ... modelN < sentences";
   }
-  
+
   public void run(String[] args) {
-    
+
     if (args.length == 0) {
       System.out.println(getHelp());
     } else {
-    
+
       NameFinderME nameFinders[] = new NameFinderME[args.length];
 
       for (int i = 0; i < nameFinders.length; i++) {
@@ -60,15 +60,17 @@ public final class TokenNameFinderTool e
         nameFinders[i] = new NameFinderME(model);
       }
 
-      ObjectStream<String> untokenizedLineStream =
-          new PlainTextByLineStream(new InputStreamReader(System.in));
-
+//      ObjectStream<String> untokenizedLineStream =
+//          new PlainTextByLineStream(new InputStreamReader(System.in));
+      ObjectStream<String> untokenizedLineStream;
       PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
       perfMon.start();
 
       try {
+        untokenizedLineStream =
+                new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
         String line;
-        while((line = untokenizedLineStream.read()) != null) {
+        while ((line = untokenizedLineStream.read()) != null) {
           String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
 
           // A new line indicates a new document,
@@ -89,17 +91,16 @@ public final class TokenNameFinderTool e
           // Simple way to drop intersecting spans, otherwise the
           // NameSample is invalid
           Span reducedNames[] = NameFinderME.dropOverlappingSpans(
-              names.toArray(new Span[names.size()]));
+                  names.toArray(new Span[names.size()]));
 
           NameSample nameSample = new NameSample(whitespaceTokenizerLine,
-              reducedNames, false);
+                  reducedNames, false);
 
           System.out.println(nameSample.toString());
 
           perfMon.incrementCounter();
         }
-      }
-      catch (IOException e) {
+      } catch (IOException e) {
         CmdLineUtil.handleStdinIoError(e);
       }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Tue Feb  4 17:10:11 2014
@@ -24,10 +24,8 @@ import java.util.HashMap;
 import java.util.Map;
 
 import opennlp.tools.cmdline.AbstractTrainerTool;
-import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
 import opennlp.tools.cmdline.CmdLineUtil;
 import opennlp.tools.cmdline.TerminateToolException;
-import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
 import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool.TrainerToolParams;
 import opennlp.tools.cmdline.params.TrainingToolParams;
 import opennlp.tools.namefind.NameSample;

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java Tue Feb  4 17:10:11 2014
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.cmdline.parser;
 
 import java.io.File;
@@ -34,6 +33,7 @@ import opennlp.tools.parser.AbstractBott
 import opennlp.tools.parser.Parse;
 import opennlp.tools.parser.ParserFactory;
 import opennlp.tools.parser.ParserModel;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.Span;
@@ -43,14 +43,13 @@ public final class ParserTool extends Ba
   public String getShortDescription() {
     return "performs full syntactic parsing";
   }
-  
+
   public String getHelp() {
-    return "Usage: " + CLI.CMD + " " + getName() + " [-bs n -ap n -k n] model < sentences \n" +
-        "-bs n: Use a beam size of n.\n" +
-        "-ap f: Advance outcomes in with at least f% of the probability mass.\n" + 
-        "-k n: Show the top n parses.  This will also display their log-probablities.";
+    return "Usage: " + CLI.CMD + " " + getName() + " [-bs n -ap n -k n] model < sentences \n"
+            + "-bs n: Use a beam size of n.\n"
+            + "-ap f: Advance outcomes in with at least f% of the probability mass.\n"
+            + "-k n: Show the top n parses.  This will also display their log-probablities.";
   }
-
   private static Pattern untokenizedParenPattern1 = Pattern.compile("([^ ])([({)}])");
   private static Pattern untokenizedParenPattern2 = Pattern.compile("([({)}])([^ ])");
 
@@ -68,70 +67,69 @@ public final class ParserTool extends Ba
     String text = sb.substring(0, sb.length() - 1);
     Parse p = new Parse(text, new Span(0, text.length()), AbstractBottomUpParser.INC_NODE, 0, 0);
     int start = 0;
-    int i=0;
-    for (Iterator<String> ti = tokens.iterator(); ti.hasNext();i++) {
+    int i = 0;
+    for (Iterator<String> ti = tokens.iterator(); ti.hasNext(); i++) {
       String tok = ti.next();
-      p.insert(new Parse(text, new Span(start, start + tok.length()), AbstractBottomUpParser.TOK_NODE, 0,i));
+      p.insert(new Parse(text, new Span(start, start + tok.length()), AbstractBottomUpParser.TOK_NODE, 0, i));
       start += tok.length() + 1;
     }
     Parse[] parses;
     if (numParses == 1) {
-      parses = new Parse[] { parser.parse(p)};
-    }
-    else {
-      parses = parser.parse(p,numParses);
+      parses = new Parse[]{parser.parse(p)};
+    } else {
+      parses = parser.parse(p, numParses);
     }
     return parses;
   }
-  
+
   public void run(String[] args) {
-    
+
     if (args.length < 1) {
       System.out.println(getHelp());
     } else {
-    
+
       ParserModel model = new ParserModelLoader().load(new File(args[args.length - 1]));
 
       Integer beamSize = CmdLineUtil.getIntParameter("-bs", args);
-      if (beamSize == null)
-          beamSize = AbstractBottomUpParser.defaultBeamSize;
+      if (beamSize == null) {
+        beamSize = AbstractBottomUpParser.defaultBeamSize;
+      }
 
       Integer numParses = CmdLineUtil.getIntParameter("-k", args);
       boolean showTopK;
       if (numParses == null) {
         numParses = 1;
         showTopK = false;
-      }
-      else {
+      } else {
         showTopK = true;
       }
 
       Double advancePercentage = CmdLineUtil.getDoubleParameter("-ap", args);
 
-      if (advancePercentage == null)
+      if (advancePercentage == null) {
         advancePercentage = AbstractBottomUpParser.defaultAdvancePercentage;
+      }
 
       opennlp.tools.parser.Parser parser =
-          ParserFactory.create(model, beamSize, advancePercentage);
+              ParserFactory.create(model, beamSize, advancePercentage);
 
-      ObjectStream<String> lineStream =
-        new PlainTextByLineStream(new InputStreamReader(System.in));
-
-      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
-      perfMon.start();
 
+      ObjectStream<String> lineStream = null;
+      PerformanceMonitor perfMon = null;
       try {
+        lineStream = new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
+        perfMon = new PerformanceMonitor(System.err, "sent");
+        perfMon.start();
         String line;
         while ((line = lineStream.read()) != null) {
           if (line.length() == 0) {
             System.out.println();
-          }
-          else {
+          } else {
             Parse[] parses = parseLine(line, parser, numParses);
 
-            for (int pi=0,pn=parses.length;pi<pn;pi++) {
+            for (int pi = 0, pn = parses.length; pi < pn; pi++) {
               if (showTopK) {
-                System.out.print(pi+" "+parses[pi].getProb()+" ");
+                System.out.print(pi + " " + parses[pi].getProb() + " ");
               }
 
               parses[pi].show();
@@ -140,8 +138,7 @@ public final class ParserTool extends Ba
             }
           }
         }
-      }
-      catch (IOException e) {
+      } catch (IOException e) {
         CmdLineUtil.handleStdinIoError(e);
       }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java Tue Feb  4 17:10:11 2014
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.cmdline.postag;
 
 import java.io.File;
@@ -29,6 +28,7 @@ import opennlp.tools.postag.POSModel;
 import opennlp.tools.postag.POSSample;
 import opennlp.tools.postag.POSTaggerME;
 import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
@@ -37,28 +37,28 @@ public final class POSTaggerTool extends
   public String getShortDescription() {
     return "learnable part of speech tagger";
   }
-  
+
   public String getHelp() {
     return "Usage: " + CLI.CMD + " " + getName() + " model < sentences";
   }
 
   public void run(String[] args) {
-    
+
     if (args.length != 1) {
       System.out.println(getHelp());
     } else {
-    
+
       POSModel model = new POSModelLoader().load(new File(args[0]));
 
       POSTaggerME tagger = new POSTaggerME(model);
 
-      ObjectStream<String> lineStream =
-        new PlainTextByLineStream(new InputStreamReader(System.in));
-
-      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
-      perfMon.start();
+      ObjectStream<String> lineStream = null;
+      PerformanceMonitor perfMon = null;
 
       try {
+        lineStream = new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
+        perfMon = new PerformanceMonitor(System.err, "sent");
+        perfMon.start();
         String line;
         while ((line = lineStream.read()) != null) {
 
@@ -70,8 +70,7 @@ public final class POSTaggerTool extends
 
           perfMon.incrementCounter();
         }
-      }
-      catch (IOException e) {
+      } catch (IOException e) {
         CmdLineUtil.handleStdinIoError(e);
       }
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java Tue Feb  4 17:10:11 2014
@@ -27,6 +27,7 @@ import opennlp.tools.cmdline.CmdLineUtil
 import opennlp.tools.cmdline.PerformanceMonitor;
 import opennlp.tools.sentdetect.SentenceDetectorME;
 import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.ParagraphStream;
 import opennlp.tools.util.PlainTextByLineStream;
@@ -59,13 +60,12 @@ public final class SentenceDetectorTool 
 
       SentenceDetectorME sdetector = new SentenceDetectorME(model);
 
-      ObjectStream<String> paraStream =
-        new ParagraphStream(new PlainTextByLineStream(new InputStreamReader(System.in)));
-
-      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
-      perfMon.start();
+       ObjectStream<String> paraStream = null;
+      PerformanceMonitor perfMon = null;
 
       try {
+        paraStream = new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
+        perfMon = new PerformanceMonitor(System.err, "sent");
         String para;
         while ((para = paraStream.read()) != null) {
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java Tue Feb  4 17:10:11 2014
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.cmdline.tokenizer;
 
 import java.io.IOException;
@@ -25,39 +24,43 @@ import opennlp.tools.cmdline.Performance
 import opennlp.tools.tokenize.Tokenizer;
 import opennlp.tools.tokenize.TokenizerStream;
 import opennlp.tools.tokenize.WhitespaceTokenStream;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
 final class CommandLineTokenizer {
 
   private final Tokenizer tokenizer;
-  
+
   CommandLineTokenizer(Tokenizer tokenizer) {
     this.tokenizer = tokenizer;
   }
-  
+
   void process() {
-    
-    ObjectStream<String> untokenizedLineStream =
-        new PlainTextByLineStream(new InputStreamReader(System.in));
-    
-    ObjectStream<String> tokenizedLineStream = new WhitespaceTokenStream(
-        new TokenizerStream(tokenizer, untokenizedLineStream));
-    
-    PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
-    perfMon.start();
-    
+    ObjectStream<String> untokenizedLineStream = null;
+
+    ObjectStream<String> tokenizedLineStream = null;
+    PerformanceMonitor perfMon = null;
     try {
+      untokenizedLineStream =
+              new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
+
+      tokenizedLineStream = new WhitespaceTokenStream(
+              new TokenizerStream(tokenizer, untokenizedLineStream));
+
+      perfMon = new PerformanceMonitor(System.err, "sent");
+      perfMon.start();
+
+
       String tokenizedLine;
       while ((tokenizedLine = tokenizedLineStream.read()) != null) {
         System.out.println(tokenizedLine);
         perfMon.incrementCounter();
       }
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       CmdLineUtil.handleStdinIoError(e);
     }
-    
+
     perfMon.stopAndPrintFinalResult();
   }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java Tue Feb  4 17:10:11 2014
@@ -28,6 +28,7 @@ import opennlp.tools.cmdline.Performance
 import opennlp.tools.tokenize.Detokenizer;
 import opennlp.tools.tokenize.DictionaryDetokenizer;
 import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
@@ -43,17 +44,17 @@ public final class DictionaryDetokenizer
     if (args.length != 1) {
       System.out.println(getHelp());
     } else {
-    
+          try {
       Detokenizer detokenizer = new DictionaryDetokenizer(
           new DetokenizationDictionaryLoader().load(new File(args[0])));
 
       ObjectStream<String> tokenizedLineStream =
-        new PlainTextByLineStream(new InputStreamReader(System.in));
+        new PlainTextByLineStream(new MockInputStreamFactory(System.in),"UTF-8");
 
       PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
       perfMon.start();
 
-      try {
+
         String tokenizedLine;
         while ((tokenizedLine = tokenizedLineStream.read()) != null) {
 
@@ -64,12 +65,13 @@ public final class DictionaryDetokenizer
 
           perfMon.incrementCounter();
         }
+              perfMon.stopAndPrintFinalResult();
       }
       catch (IOException e) {
         CmdLineUtil.handleStdinIoError(e);
       }
 
-      perfMon.stopAndPrintFinalResult();
+
     }
   }
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java Tue Feb  4 17:10:11 2014
@@ -37,6 +37,7 @@ public class BagOfWordsFeatureGenerator 
     this.useOnlyAllLetterTokens = useOnlyAllLetterTokens;
   }
   
+  @Override
   public Collection<String> extractFeatures(String[] text) {
 
     Collection<String> bagOfWords = new ArrayList<String>(text.length);

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java Tue Feb  4 17:10:11 2014
@@ -23,11 +23,8 @@ import java.io.ObjectStreamException;
 import java.util.HashMap;
 import java.util.Map;
 
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.model.AbstractModel;
 import opennlp.tools.ml.model.MaxentModel;
 import opennlp.tools.ml.model.TrainUtil;
-import opennlp.tools.ml.model.TwoPassDataIndexer;
 import opennlp.tools.tokenize.SimpleTokenizer;
 import opennlp.tools.tokenize.Tokenizer;
 import opennlp.tools.util.ObjectStream;

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java Tue Feb  4 17:10:11 2014
@@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.Span;
@@ -54,9 +55,9 @@ public class BioNLP2004NameSampleStream 
   
   public BioNLP2004NameSampleStream(InputStream in, int types) {
     try {
-      this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+      this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8");
       System.setOut(new PrintStream(System.out, true, "UTF-8"));
-    } catch (UnsupportedEncodingException e) {
+    } catch (IOException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);
     }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java Tue Feb  4 17:10:11 2014
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.formats;
 
 import opennlp.tools.chunker.ChunkSample;
@@ -27,6 +26,10 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
 import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
 
 /**
  * Factory producing OpenNLP {@link ChunkSampleStream}s.
@@ -38,7 +41,7 @@ public class ChunkerSampleStreamFactory 
 
   public static void registerFactory() {
     StreamFactoryRegistry.registerFactory(ChunkSample.class,
-        StreamFactoryRegistry.DEFAULT_FORMAT, new ChunkerSampleStreamFactory(Parameters.class));
+            StreamFactoryRegistry.DEFAULT_FORMAT, new ChunkerSampleStreamFactory(Parameters.class));
   }
 
   protected <P> ChunkerSampleStreamFactory(Class<P> params) {
@@ -50,9 +53,13 @@ public class ChunkerSampleStreamFactory 
 
     CmdLineUtil.checkInputFile("Data", params.getData());
     FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
-
-    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn
-        .getChannel(), params.getEncoding());
+    ObjectStream<String> lineStream = null;
+    try {
+      lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn), params.getEncoding());
+
+    } catch (IOException ex) {
+      Logger.getLogger(ChunkerSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
+    }
 
     return new ChunkSampleStream(lineStream);
   }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java Tue Feb  4 17:10:11 2014
@@ -26,6 +26,7 @@ import java.util.List;
 
 import opennlp.tools.namefind.NameSample;
 import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.Span;
@@ -80,9 +81,9 @@ public class Conll02NameSampleStream imp
     
     this.lang = lang;
     try {
-      this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+      this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8");
       System.setOut(new PrintStream(System.out, true, "UTF-8"));
-    } catch (UnsupportedEncodingException e) {
+    } catch (IOException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);
     } 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java Tue Feb  4 17:10:11 2014
@@ -25,6 +25,7 @@ import java.util.ArrayList;
 import java.util.List;
 
 import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.Span;
@@ -67,9 +68,9 @@ public class Conll03NameSampleStream imp
 
     this.lang = lang;
     try {
-      this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+      this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8");
       System.setOut(new PrintStream(System.out, true, "UTF-8"));
-    } catch (UnsupportedEncodingException e) {
+    } catch (IOException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);
     }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java Tue Feb  4 17:10:11 2014
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.formats;
 
 import opennlp.tools.cmdline.ArgumentParser;
@@ -27,6 +26,10 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
 import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
 
 /**
  * Factory producing OpenNLP {@link DocumentSampleStream}s.
@@ -38,7 +41,7 @@ public class DocumentSampleStreamFactory
 
   public static void registerFactory() {
     StreamFactoryRegistry.registerFactory(DocumentSample.class,
-        StreamFactoryRegistry.DEFAULT_FORMAT, new DocumentSampleStreamFactory(Parameters.class));
+            StreamFactoryRegistry.DEFAULT_FORMAT, new DocumentSampleStreamFactory(Parameters.class));
   }
 
   protected <P> DocumentSampleStreamFactory(Class<P> params) {
@@ -50,9 +53,16 @@ public class DocumentSampleStreamFactory
 
     CmdLineUtil.checkInputFile("Data", params.getData());
     FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
-
-    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
-        params.getEncoding());
+    ObjectStream<String> lineStream=null;
+    try {
+      lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+              params.getEncoding());
+      //        params.getEncoding());
+      //    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
+      //        params.getEncoding());
+    } catch (IOException ex) {
+        throw new RuntimeException(ex);
+    }
 
     return new DocumentSampleStream(lineStream);
   }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java Tue Feb  4 17:10:11 2014
@@ -26,6 +26,7 @@ import java.util.List;
 
 import opennlp.tools.namefind.NameSample;
 import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.Span;
@@ -87,9 +88,9 @@ public class EvalitaNameSampleStream imp
 
     this.lang = lang;
     try {
-      this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+      this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8");
       System.setOut(new PrintStream(System.out, true, "UTF-8"));
-    } catch (UnsupportedEncodingException e) {
+    } catch (IOException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);
     }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java Tue Feb  4 17:10:11 2014
@@ -24,6 +24,7 @@ import java.io.PrintStream;
 import opennlp.tools.doccat.DocumentSample;
 import opennlp.tools.tokenize.SimpleTokenizer;
 import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.PlainTextByLineStream;
 
 /**
@@ -52,7 +53,7 @@ public class LeipzigDoccatSampleStream e
    */
   LeipzigDoccatSampleStream(String language, int sentencesPerDocument, 
       InputStream in) throws IOException {
-    super(new PlainTextByLineStream(in, "UTF-8"));
+    super(new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8"));
     System.setOut(new PrintStream(System.out, true, "UTF-8"));
     this.language = language;
     this.sentencesPerDocument = sentencesPerDocument;

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java Tue Feb  4 17:10:11 2014
@@ -12,13 +12,15 @@
  *  limitations under the License.
  *  under the License.
  */
-
 package opennlp.tools.formats;
 
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.Charset;
 import java.util.Locale;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
 
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
@@ -27,10 +29,10 @@ import opennlp.tools.util.StringUtil;
 
 /**
  * This class helps to read the US Census data from the files to build a
- * StringList for each dictionary entry in the name-finder dictionary.
- * The entries in the source file are as follows:
+ * StringList for each dictionary entry in the name-finder dictionary. The
+ * entries in the source file are as follows:
  * <p>
- *      SMITH          1.006  1.006      1
+ * SMITH 1.006 1.006 1
  * <p>
  * <ul>
  * <li>The first field is the name (in ALL CAPS).
@@ -45,14 +47,14 @@ public class NameFinderCensus90NameStrea
 
   private final Locale locale;
   private final Charset encoding;
-  private final ObjectStream<String> lineStream;
+  private ObjectStream<String> lineStream;
 
   /**
    * This constructor takes an ObjectStream and initializes the class to handle
    * the stream.
    *
-   * @param lineStream  an <code>ObjectSteam<String></code> that represents the
-   *                    input file to be attached to this class.
+   * @param lineStream an <code>ObjectSteam<String></code> that represents the
+   *                   input file to be attached to this class.
    */
   public NameFinderCensus90NameStream(ObjectStream<String> lineStream) {
     this.locale = new Locale("en");   // locale is English
@@ -62,24 +64,32 @@ public class NameFinderCensus90NameStrea
   }
 
   /**
-   * This constructor takes an <code>InputStream</code> and a <code>Charset</code>
-   * and opens an associated stream object with the specified encoding specified.
+   * This constructor takes an
+   * <code>InputStream</code> and a
+   * <code>Charset</code> and opens an associated stream object with the
+   * specified encoding specified.
    *
-   * @param in  an <code>InputStream</code> for the input file.
-   * @param encoding  the <code>Charset</code> to apply to the input stream.
+   * @param in       an <code>InputStream</code> for the input file.
+   * @param encoding the <code>Charset</code> to apply to the input stream.
    */
   public NameFinderCensus90NameStream(InputStream in, Charset encoding) {
     this.locale = new Locale("en");   // locale is English
     this.encoding = encoding;
-    this.lineStream = new PlainTextByLineStream(in, this.encoding);
+
+    try {
+      this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), this.encoding);
+    } catch (IOException ex) {
+
+      throw new RuntimeException(ex);
+    }
   }
 
   public StringList read() throws IOException {
     String line = lineStream.read();
     StringList name = null;
 
-    if ((line != null) &&
-        (!StringUtil.isEmpty(line))) {
+    if ((line != null)
+            && (!StringUtil.isEmpty(line))) {
       String name2;
       // find the location of the name separator in the line of data.
       int pos = line.indexOf(' ');
@@ -87,15 +97,15 @@ public class NameFinderCensus90NameStrea
         String parsed = line.substring(0, pos);
         // the data is in ALL CAPS ... so the easiest way is to convert
         // back to standard mixed case.
-        if ((parsed.length() > 2) &&
-            (parsed.startsWith("MC"))) {
-          name2 = parsed.substring(0,1).toUpperCase(locale) +
-                  parsed.substring(1,2).toLowerCase(locale) +
-                  parsed.substring(2,3).toUpperCase(locale) +
-                  parsed.substring(3).toLowerCase(locale);
+        if ((parsed.length() > 2)
+                && (parsed.startsWith("MC"))) {
+          name2 = parsed.substring(0, 1).toUpperCase(locale)
+                  + parsed.substring(1, 2).toLowerCase(locale)
+                  + parsed.substring(2, 3).toUpperCase(locale)
+                  + parsed.substring(3).toLowerCase(locale);
         } else {
-          name2 = parsed.substring(0,1).toUpperCase(locale) +
-                  parsed.substring(1).toLowerCase(locale);
+          name2 = parsed.substring(0, 1).toUpperCase(locale)
+                  + parsed.substring(1).toLowerCase(locale);
         }
         name = new StringList(new String[]{name2});
       }
@@ -111,5 +121,4 @@ public class NameFinderCensus90NameStrea
   public void close() throws IOException {
     lineStream.close();
   }
-
 }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java Tue Feb  4 17:10:11 2014
@@ -14,10 +14,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.formats;
 
 import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import opennlp.tools.cmdline.ArgumentParser;
 import opennlp.tools.cmdline.CmdLineUtil;
@@ -25,6 +27,7 @@ import opennlp.tools.cmdline.StreamFacto
 import opennlp.tools.cmdline.params.BasicFormatParams;
 import opennlp.tools.namefind.NameSample;
 import opennlp.tools.namefind.NameSampleDataStream;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
@@ -38,7 +41,7 @@ public class NameSampleDataStreamFactory
 
   public static void registerFactory() {
     StreamFactoryRegistry.registerFactory(NameSample.class,
-        StreamFactoryRegistry.DEFAULT_FORMAT, new NameSampleDataStreamFactory(Parameters.class));
+            StreamFactoryRegistry.DEFAULT_FORMAT, new NameSampleDataStreamFactory(Parameters.class));
   }
 
   protected <P> NameSampleDataStreamFactory(Class<P> params) {
@@ -49,11 +52,16 @@ public class NameSampleDataStreamFactory
     Parameters params = ArgumentParser.parse(args, Parameters.class);
 
     CmdLineUtil.checkInputFile("Data", params.getData());
-    
+
     FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
 
-    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
-        params.getEncoding());
+    ObjectStream<String> lineStream = null;
+    try {
+      lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+              params.getEncoding());
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
 
     return new NameSampleDataStream(lineStream);
   }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java Tue Feb  4 17:10:11 2014
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.formats;
 
 import opennlp.tools.cmdline.ArgumentParser;
@@ -27,6 +26,10 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
 import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
 
 /**
  * Factory producing OpenNLP {@link ParseSampleStream}s.
@@ -38,7 +41,7 @@ public class ParseSampleStreamFactory ex
 
   public static void registerFactory() {
     StreamFactoryRegistry.registerFactory(Parse.class,
-        StreamFactoryRegistry.DEFAULT_FORMAT, new ParseSampleStreamFactory(Parameters.class));
+            StreamFactoryRegistry.DEFAULT_FORMAT, new ParseSampleStreamFactory(Parameters.class));
   }
 
   protected <P> ParseSampleStreamFactory(Class<P> params) {
@@ -51,8 +54,12 @@ public class ParseSampleStreamFactory ex
     CmdLineUtil.checkInputFile("Data", params.getData());
     FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
 
-    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn
-        .getChannel(), params.getEncoding());
+    ObjectStream<String> lineStream = null;
+    try {
+      lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn), params.getEncoding());
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
 
     return new ParseSampleStream(lineStream);
   }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java Tue Feb  4 17:10:11 2014
@@ -27,6 +27,10 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
 import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
 
 /**
  * Factory producing OpenNLP {@link SentenceSampleStream}s.
@@ -51,8 +55,13 @@ public class SentenceSampleStreamFactory
     CmdLineUtil.checkInputFile("Data", params.getData());
     FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
 
-    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
-        params.getEncoding());
+    ObjectStream<String> lineStream=null;
+    try {
+      lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+params.getEncoding());
+    } catch (IOException ex) {
+      Logger.getLogger(SentenceSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
+    }
 
     return new SentenceSampleStream(lineStream);
   }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java Tue Feb  4 17:10:11 2014
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.formats;
 
 import opennlp.tools.cmdline.ArgumentParser;
@@ -27,6 +26,10 @@ import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
 import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
 
 /**
  * Factory producing OpenNLP {@link TokenSampleStream}s.
@@ -38,7 +41,7 @@ public class TokenSampleStreamFactory ex
 
   public static void registerFactory() {
     StreamFactoryRegistry.registerFactory(TokenSample.class,
-        StreamFactoryRegistry.DEFAULT_FORMAT, new TokenSampleStreamFactory(Parameters.class));
+            StreamFactoryRegistry.DEFAULT_FORMAT, new TokenSampleStreamFactory(Parameters.class));
   }
 
   protected <P> TokenSampleStreamFactory(Class<P> params) {
@@ -51,8 +54,13 @@ public class TokenSampleStreamFactory ex
     CmdLineUtil.checkInputFile("Data", params.getData());
     FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
 
-    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
-        params.getEncoding());
+    ObjectStream<String> lineStream = null;
+    try {
+      lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+              params.getEncoding());
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
 
     return new TokenSampleStream(lineStream);
   }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java Tue Feb  4 17:10:11 2014
@@ -14,10 +14,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package opennlp.tools.formats;
 
 import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import opennlp.tools.cmdline.ArgumentParser;
 import opennlp.tools.cmdline.CmdLineUtil;
@@ -25,6 +27,7 @@ import opennlp.tools.cmdline.StreamFacto
 import opennlp.tools.cmdline.params.BasicFormatParams;
 import opennlp.tools.postag.POSSample;
 import opennlp.tools.postag.WordTagSampleStream;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
@@ -38,9 +41,9 @@ public class WordTagSampleStreamFactory 
 
   public static void registerFactory() {
     StreamFactoryRegistry.registerFactory(POSSample.class,
-        StreamFactoryRegistry.DEFAULT_FORMAT, new WordTagSampleStreamFactory(Parameters.class));
+            StreamFactoryRegistry.DEFAULT_FORMAT, new WordTagSampleStreamFactory(Parameters.class));
   }
-  
+
   protected <P> WordTagSampleStreamFactory(Class<P> params) {
     super(params);
   }
@@ -51,8 +54,13 @@ public class WordTagSampleStreamFactory 
     CmdLineUtil.checkInputFile("Data", params.getData());
     FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
 
-    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
-        params.getEncoding());
+    ObjectStream<String> lineStream = null;
+    try {
+      lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+              params.getEncoding());
+    } catch (IOException ex) {
+      throw new RuntimeException(ex);
+    }
 
     return new WordTagSampleStream(lineStream);
   }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java Tue Feb  4 17:10:11 2014
@@ -29,6 +29,7 @@ import opennlp.tools.formats.ad.ADSenten
 import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Node;
 import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.TreeElement;
 import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.StringUtil;
@@ -91,8 +92,8 @@ public class ADChunkSampleStream impleme
 
 		try {
 			this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
-					in, charsetName));
-		} catch (UnsupportedEncodingException e) {
+					new MockInputStreamFactory(in), charsetName));
+		} catch (IOException e) {
 			// UTF-8 is available on all JVMs, will never happen
 			throw new IllegalStateException(e);
 		}

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java Tue Feb  4 17:10:11 2014
@@ -19,7 +19,10 @@ package opennlp.tools.formats.ad;
 
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.IOException;
 import java.nio.charset.Charset;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import opennlp.tools.chunker.ChunkSample;
 import opennlp.tools.cmdline.ArgumentParser;
@@ -28,6 +31,7 @@ import opennlp.tools.cmdline.ArgumentPar
 import opennlp.tools.cmdline.CmdLineUtil;
 import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.formats.LanguageSampleStreamFactory;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
@@ -78,8 +82,13 @@ public class ADChunkSampleStreamFactory 
 
     FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
     
-    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
-        params.getEncoding());
+    ObjectStream<String> lineStream=null;
+    try {
+      lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+params.getEncoding());
+    } catch (IOException ex) {
+      Logger.getLogger(ADChunkSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
+    }
 
     ADChunkSampleStream sampleStream = new ADChunkSampleStream(lineStream);
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java Tue Feb  4 17:10:11 2014
@@ -35,6 +35,7 @@ import opennlp.tools.formats.ad.ADSenten
 import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Node;
 import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.TreeElement;
 import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.Span;
@@ -191,9 +192,9 @@ public class ADNameSampleStream implemen
 
     try {
       this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
-          in, charsetName));
+          new MockInputStreamFactory(in), charsetName));
       this.splitHyphenatedTokens = splitHyphenatedTokens;
-    } catch (UnsupportedEncodingException e) {
+    } catch (IOException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);
     }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java Tue Feb  4 17:10:11 2014
@@ -19,7 +19,10 @@ package opennlp.tools.formats.ad;
 
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.IOException;
 import java.nio.charset.Charset;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import opennlp.tools.cmdline.ArgumentParser;
 import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
@@ -28,6 +31,7 @@ import opennlp.tools.cmdline.CmdLineUtil
 import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.formats.LanguageSampleStreamFactory;
 import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
@@ -74,8 +78,12 @@ public class ADNameSampleStreamFactory e
 
     FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
 
-    ObjectStream<String> lineStream = new PlainTextByLineStream(
-        sampleDataIn.getChannel(), params.getEncoding());
+    ObjectStream<String> lineStream=null;
+    try {
+      lineStream = new PlainTextByLineStream(
+new MockInputStreamFactory(sampleDataIn), params.getEncoding());
+    } catch (IOException ex) {
+throw new RuntimeException(ex)  ;  }
 
     return new ADNameSampleStream(lineStream, params.getSplitHyphenatedTokens());
   }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java Tue Feb  4 17:10:11 2014
@@ -29,6 +29,7 @@ import opennlp.tools.formats.ad.ADSenten
 import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Node;
 import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.TreeElement;
 import opennlp.tools.postag.POSSample;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
@@ -81,10 +82,10 @@ public class ADPOSSampleStream implement
 
     try {
       this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
-          in, charsetName));
+          new MockInputStreamFactory(in), charsetName));
       this.expandME = expandME;
       this.isIncludeFeatures = includeFeatures;
-    } catch (UnsupportedEncodingException e) {
+    } catch (IOException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);
     }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java Tue Feb  4 17:10:11 2014
@@ -19,7 +19,10 @@ package opennlp.tools.formats.ad;
 
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.IOException;
 import java.nio.charset.Charset;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import opennlp.tools.cmdline.ArgumentParser;
 import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
@@ -28,6 +31,7 @@ import opennlp.tools.cmdline.CmdLineUtil
 import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.formats.LanguageSampleStreamFactory;
 import opennlp.tools.postag.POSSample;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
@@ -73,8 +77,13 @@ public class ADPOSSampleStreamFactory ex
 
     FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
 
-    ObjectStream<String> lineStream = new PlainTextByLineStream(
-        sampleDataIn.getChannel(), params.getEncoding());
+    ObjectStream<String> lineStream=null;
+    try {
+      lineStream = new PlainTextByLineStream(
+new MockInputStreamFactory(sampleDataIn), params.getEncoding());
+    } catch (IOException ex) {
+      Logger.getLogger(ADPOSSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
+    }
 
     ADPOSSampleStream sentenceStream = new ADPOSSampleStream(lineStream,
         params.getExpandME(), params.getIncludeFeatures());

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java Tue Feb  4 17:10:11 2014
@@ -29,6 +29,7 @@ import java.util.regex.Pattern;
 import opennlp.tools.formats.ad.ADSentenceStream.Sentence;
 import opennlp.tools.sentdetect.SentenceSample;
 import opennlp.tools.sentdetect.lang.Factory;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.Span;
@@ -81,8 +82,8 @@ public class ADSentenceSampleStream impl
       boolean includeHeadlines) {
     try {
       this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
-          in, charsetName));
-    } catch (UnsupportedEncodingException e) {
+          new MockInputStreamFactory(in), charsetName));
+    } catch (IOException e) {
       // UTF-8 is available on all JVMs, will never happen
       throw new IllegalStateException(e);
     }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java Tue Feb  4 17:10:11 2014
@@ -19,7 +19,10 @@ package opennlp.tools.formats.ad;
 
 import java.io.File;
 import java.io.FileInputStream;
+import java.io.IOException;
 import java.nio.charset.Charset;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import opennlp.tools.cmdline.ArgumentParser;
 import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
@@ -28,6 +31,7 @@ import opennlp.tools.cmdline.CmdLineUtil
 import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.formats.LanguageSampleStreamFactory;
 import opennlp.tools.sentdetect.SentenceSample;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
@@ -71,8 +75,13 @@ public class ADSentenceSampleStreamFacto
 
     FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
 
-    ObjectStream<String> lineStream = new PlainTextByLineStream(
-        sampleDataIn.getChannel(), params.getEncoding());
+    ObjectStream<String> lineStream=null;
+    try {
+      lineStream = new PlainTextByLineStream(
+new MockInputStreamFactory(sampleDataIn), params.getEncoding());
+    } catch (IOException ex) {
+      Logger.getLogger(ADSentenceSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
+    }
 
     ADSentenceSampleStream sentenceStream = new ADSentenceSampleStream(
         lineStream, includeTitle);

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java Tue Feb  4 17:10:11 2014
@@ -24,6 +24,7 @@ import java.util.Map;
 
 import opennlp.tools.ml.model.Event;
 import opennlp.tools.ml.model.EventStream;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.Span;
@@ -151,7 +152,7 @@ public class NameFinderEventStream exten
       System.exit(1);
     }
     EventStream es = new NameFinderEventStream(new NameSampleDataStream(
-        new PlainTextByLineStream(new java.io.InputStreamReader(System.in))));
+        new PlainTextByLineStream(new MockInputStreamFactory(System.in),"UTF-8")));
     while (es.hasNext()) {
       System.out.println(es.next());
     }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java Tue Feb  4 17:10:11 2014
@@ -125,8 +125,9 @@ public final class RegexNameFinder imple
   }
 
   /**
-   * NEW. This method removes the need for tokenization, but returns the Span
-   * with character indices, rather than word.
+   * NEW. This method removes the need for tokenization, but returns the
+   * character spans rather than word spans. Span.spansToStrings will not work
+   * properly on this output.
    *
    * @param text
    * @return

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java Tue Feb  4 17:10:11 2014
@@ -23,7 +23,7 @@ import opennlp.tools.util.Span;
 /**
  *
  * Returns RegexNameFinders based on multiple methods: 1. A selection of
- * defaults 2. A configuration and a selection of defaults 3.
+ * defaults 2. A configuration and a selection of defaults
  */
 public class RegexNameFinderFactory {
 
@@ -41,7 +41,10 @@ public class RegexNameFinderFactory {
     if (config == null) {
       throw new IllegalArgumentException("config Map cannot be null");
     }
-    Map<String, Pattern[]> defaultsToMap = defaultsToMap(defaults);
+    Map<String, Pattern[]> defaultsToMap = new HashMap<>();
+    if (defaults != null) {
+      defaultsToMap = defaultsToMap(defaults);
+    }
     defaultsToMap.putAll(config);
     return new RegexNameFinder(defaultsToMap);
   }
@@ -109,8 +112,8 @@ public class RegexNameFinderFactory {
       @Override
       public Map<String, Pattern[]> getRegexMap() {
         Pattern[] p = new Pattern[1];
-       // p[0] = Pattern.compile("([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})", Pattern.CASE_INSENSITIVE);
-        p[0]=Pattern.compile("((\\(\\d{3}\\) ?)|(\\d{3}-))?\\d{3}-\\d{4}");
+        // p[0] = Pattern.compile("([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})", Pattern.CASE_INSENSITIVE);
+        p[0] = Pattern.compile("((\\(\\d{3}\\) ?)|(\\d{3}-))?\\d{3}-\\d{4}");
         Map<String, Pattern[]> regexMap = new HashMap<>();
         regexMap.put(getType(), p);
         return regexMap;

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java Tue Feb  4 17:10:11 2014
@@ -29,6 +29,7 @@ import opennlp.tools.parser.Parse;
 import opennlp.tools.parser.ParseSampleStream;
 import opennlp.tools.parser.ParserEventTypeEnum;
 import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 
@@ -204,7 +205,7 @@ public class ParserEventStream extends A
     if (fun) {
       Parse.useFunctionTags(true);
     }
-    opennlp.tools.ml.model.EventStream es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.InputStreamReader(System.in))), rules, etype, dict);
+    opennlp.tools.ml.model.EventStream es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new MockInputStreamFactory(System.in),"UTF-8")), rules, etype, dict);
     while (es.hasNext()) {
       System.out.println(es.next());
     }

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java Tue Feb  4 17:10:11 2014
@@ -36,6 +36,7 @@ import opennlp.tools.parser.Parse;
 import opennlp.tools.parser.ParseSampleStream;
 import opennlp.tools.parser.ParserEventTypeEnum;
 import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.MockInputStreamFactory;
 import opennlp.tools.util.ObjectStream;
 import opennlp.tools.util.PlainTextByLineStream;
 import opennlp.tools.util.Span;
@@ -379,7 +380,7 @@ public class ParserEventStream extends A
     if (fun) {
       Parse.useFunctionTags(true);
     }
-    opennlp.tools.ml.model.EventStream es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.InputStreamReader(System.in))), rules, etype, dict);
+    opennlp.tools.ml.model.EventStream es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new MockInputStreamFactory(System.in),"UTF-8")), rules, etype, dict);
     while (es.hasNext()) {
       Event e = es.next();
       if (model != null) {



Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/ main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/ main/java/opennlp/tools/cmdlin...

Posted by Jörn Kottmann <ko...@gmail.com>.
Please don't change files which don't have to be changed for a certain 
issue.
Changes, like adding Override annotations should be done in a separate 
jira issue.

Jörn

On 02/04/2014 06:10 PM, markg@apache.org wrote:
> Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
> URL:http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
> ==============================================================================
> --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java (original)
> +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java Tue Feb  4 17:10:11 2014
> @@ -40,10 +40,12 @@ public class DoccatTrainerTool
>       super(DocumentSample.class, TrainerToolParams.class);
>     }
>   
> +  @Override
>     public String getShortDescription() {
>       return "trainer for the learnable document categorizer";
>     }
>     
> +  @Override
>     public void run(String format, String[] args) {
>       super.run(format, args);
>   


Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/ main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/ main/java/opennlp/tools/cmdlin...

Posted by Jörn Kottmann <ko...@gmail.com>.
On 02/06/2014 01:28 PM, Jörn Kottmann wrote:
> On 02/04/2014 06:10 PM, markg@apache.org wrote:
>> Modified: 
>> opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
>> URL:http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff 
>>
>> ============================================================================== 
>>
>> --- 
>> opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java 
>> (original)
>> +++ 
>> opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java 
>> Tue Feb  4 17:10:11 2014
>> @@ -14,7 +14,6 @@
>>    * See the License for the specific language governing permissions and
>>    * limitations under the License.
>>    */
>
> <SNIP>
>>   -      ObjectStream<String> lineStream =
>> -        new PlainTextByLineStream(new InputStreamReader(System.in));
>> -
>> -      PerformanceMonitor perfMon = new 
>> PerformanceMonitor(System.err, "sent");
>> -      perfMon.start();
>> +      ObjectStream<String> lineStream = null;
>> +      PerformanceMonitor perfMon = null;
>>           try {
>> +        lineStream = new PlainTextByLineStream(new 
>> MockInputStreamFactory(System.in), "UTF-8");
>> +        perfMon = new PerformanceMonitor(System.err, "sent");
>> +        perfMon.start();
>
>
> What is the motivation to move the start of the performance monitoring 
> down? Before it measured opening
> the input stream as well, now it won't.
>
> If you think it should not include opening the stream I suggest we do 
> this change in a separate jira issue and handle it
> identical in all places.


Ups, my mistake, I was looking at the wrong file. Here it was moved down 
to maintain the order.
But in the TokenNameFinderTool it was changed.

Jörn

Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/ main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/ main/java/opennlp/tools/cmdlin...

Posted by Jörn Kottmann <ko...@gmail.com>.
On 02/04/2014 06:10 PM, markg@apache.org wrote:
> Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
> URL:http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
> ==============================================================================
> --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java (original)
> +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java Tue Feb  4 17:10:11 2014
> @@ -14,7 +14,6 @@
>    * See the License for the specific language governing permissions and
>    * limitations under the License.
>    */

<SNIP>
>   
> -      ObjectStream<String> lineStream =
> -        new PlainTextByLineStream(new InputStreamReader(System.in));
> -
> -      PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
> -      perfMon.start();
> +      ObjectStream<String> lineStream = null;
> +      PerformanceMonitor perfMon = null;
>   
>         try {
> +        lineStream = new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
> +        perfMon = new PerformanceMonitor(System.err, "sent");
> +        perfMon.start();


What is the motivation to move the start of the performance monitoring 
down? Before it measured opening
the input stream as well, now it won't.

If you think it should not include opening the stream I suggest we do 
this change in a separate jira issue and handle it
identical in all places.

Jörn

Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/ main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/ main/java/opennlp/tools/cmdlin...

Posted by Jörn Kottmann <ko...@gmail.com>.
On 02/04/2014 06:10 PM, markg@apache.org wrote:
> @@ -51,8 +55,13 @@ public class SentenceSampleStreamFactory
>       CmdLineUtil.checkInputFile("Data", params.getData());
>       FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
>   
> -    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
> -        params.getEncoding());
> +    ObjectStream<String> lineStream=null;
> +    try {
> +      lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
> +params.getEncoding());
> +    } catch (IOException ex) {
> +      Logger.getLogger(SentenceSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
> +    }
>   
>       return new SentenceSampleStream(lineStream);
>     }
>

<SNIP>

>   
>   /**
>    * Factory producing OpenNLP {@link TokenSampleStream}s.
> @@ -38,7 +41,7 @@ public class TokenSampleStreamFactory ex
>   
>     public static void registerFactory() {
>       StreamFactoryRegistry.registerFactory(TokenSample.class,
> -        StreamFactoryRegistry.DEFAULT_FORMAT, new TokenSampleStreamFactory(Parameters.class));
> +            StreamFactoryRegistry.DEFAULT_FORMAT, new TokenSampleStreamFactory(Parameters.class));
>     }
>   
>     protected <P> TokenSampleStreamFactory(Class<P> params) {
> @@ -51,8 +54,13 @@ public class TokenSampleStreamFactory ex
>       CmdLineUtil.checkInputFile("Data", params.getData());
>       FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
>   
> -    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
> -        params.getEncoding());
> +    ObjectStream<String> lineStream = null;
> +    try {
> +      lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
> +              params.getEncoding());
> +    } catch (IOException ex) {
> +      throw new RuntimeException(ex);
> +    }


We need to be careful here with the error handling.

It is always good to see how things worked before. In the previous 
version a TerminateToolException was thrown in case
the stream couldn't be opened. I suggest that we keep that way of 
handling it. Anway, what ever we decide, we should do
it consistently across the code base.

Jörn




Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/ main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/ main/java/opennlp/tools/cmdlin...

Posted by Jörn Kottmann <ko...@gmail.com>.
On 02/04/2014 06:10 PM, markg@apache.org wrote:
>     public BioNLP2004NameSampleStream(InputStream in, int types) {
>       try {
> -      this.lineStream = new PlainTextByLineStream(in, "UTF-8");
> +      this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8");
>         System.setOut(new PrintStream(System.out, true, "UTF-8"));
> -    } catch (UnsupportedEncodingException e) {
> +    } catch (IOException e) {
>         // UTF-8 is available on all JVMs, will never happen
>         throw new IllegalStateException(e);
>       }

All these streams need to be changed as well, the InputStream in has to 
be deprecated and a new
constructor taking the InputStreamFactory needs to be added.

I suggest that we revert all these changes, and instead just add new 
constructor.

Jörn

Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/ main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/ main/java/opennlp/tools/cmdlin...

Posted by Jörn Kottmann <ko...@gmail.com>.
On 02/04/2014 06:10 PM, markg@apache.org wrote:
> +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java Tue Feb  4 17:10:11 2014

<SNIP>

>     void process() {
> -
> -    ObjectStream<String> untokenizedLineStream =
> -        new PlainTextByLineStream(new InputStreamReader(System.in));
> -
> -    ObjectStream<String> tokenizedLineStream = new WhitespaceTokenStream(
> -        new TokenizerStream(tokenizer, untokenizedLineStream));
> -
> -    PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
> -    perfMon.start();
> -
> +    ObjectStream<String> untokenizedLineStream = null;
> +
> +    ObjectStream<String> tokenizedLineStream = null;
> +    PerformanceMonitor perfMon = null;
>       try {
> +      untokenizedLineStream =
> +              new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");

The encoding should not be changed. To read from System.in the default 
encoding should be used, and not UTF-8.
As far as I know that will not work on Windows.

Jörn