You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/12/01 21:08:58 UTC

svn commit: r1209220 [4/4] - in /incubator/opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/ main/java/opennlp/tools/cmdline/dictionary/ main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/...

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -17,56 +17,39 @@
 
 package opennlp.tools.formats;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
 import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.cmdline.params.DetokenizerParameter;
 import opennlp.tools.postag.POSSample;
 import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
 import opennlp.tools.util.ObjectStream;
 
 /**
  * <b>Note:</b> Do not use this class, internal use only!
  */
-public class ConllXSentenceSampleStreamFactory implements ObjectStreamFactory<SentenceSample> {
+public class ConllXSentenceSampleStreamFactory extends
+    DetokenizerSampleStreamFactory<SentenceSample> {
 
   interface Parameters extends ConllXPOSSampleStreamFactory.Parameters, DetokenizerParameter {    
-    // TODO:
-    // Make chunk size configurable
+    // TODO: make chunk size configurable
   }
-  
-  public String getUsage() {
-    return ArgumentParser.createUsage(Parameters.class);
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(SentenceSample.class,
+        ConllXPOSSampleStreamFactory.CONLLX_FORMAT, new ConllXSentenceSampleStreamFactory(Parameters.class));
   }
 
-  public String validateArguments(String[] args) {
-    return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+  protected <P> ConllXSentenceSampleStreamFactory(Class<P> params) {
+    super(params);
   }
 
   public ObjectStream<SentenceSample> create(String[] args) {
-    
     Parameters params = ArgumentParser.parse(args, Parameters.class);
-    
-    // TODO: Compare code to ConllXTokenSampleStream, maybe it can be shared somehow
-    
-    ObjectStream<POSSample> posSampleStream = 
-        new ConllXPOSSampleStreamFactory().create(params);
-    
-    Detokenizer detokenizer;
-    try {
-      detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(new FileInputStream(new File(params.getDetokenizer()))));
-    } catch (IOException e) {
-      System.err.println("Error while loading detokenizer dict: " + e.getMessage());
-      throw new TerminateToolException(-1);
-    }
-    
-    return new POSToSentenceSampleStream(detokenizer, posSampleStream, 30);
+    language = params.getLang();
+
+    ObjectStream<POSSample> posSampleStream = StreamFactoryRegistry.getFactory(POSSample.class,
+        ConllXPOSSampleStreamFactory.CONLLX_FORMAT).create(
+        ArgumentParser.filter(args, ConllXPOSSampleStreamFactory.Parameters.class));
+    return new POSToSentenceSampleStream(createDetokenizer(params), posSampleStream, 30);
   }
 }

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXTokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXTokenSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXTokenSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXTokenSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -17,51 +17,37 @@
 
 package opennlp.tools.formats;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
 import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.cmdline.params.DetokenizerParameter;
 import opennlp.tools.postag.POSSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
 import opennlp.tools.tokenize.TokenSample;
 import opennlp.tools.util.ObjectStream;
 
 /**
  * <b>Note:</b> Do not use this class, internal use only!
  */
-public class ConllXTokenSampleStreamFactory implements ObjectStreamFactory<TokenSample> {
-  
+public class ConllXTokenSampleStreamFactory extends DetokenizerSampleStreamFactory<TokenSample> {
+
   interface Parameters extends ConllXPOSSampleStreamFactory.Parameters, DetokenizerParameter {
   }
-  
-  public String getUsage() {
-    return ArgumentParser.createUsage(Parameters.class);
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(TokenSample.class,
+        ConllXPOSSampleStreamFactory.CONLLX_FORMAT, new ConllXTokenSampleStreamFactory(Parameters.class));
   }
 
-  public String validateArguments(String[] args) {
-    return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+  protected <P> ConllXTokenSampleStreamFactory(Class<P> params) {
+    super(params);
   }
 
   public ObjectStream<TokenSample> create(String[] args) {
-    
     Parameters params = ArgumentParser.parse(args, Parameters.class);
-    
-    ObjectStream<POSSample> samples = new ConllXPOSSampleStreamFactory().create(params);
-    
-    Detokenizer detokenizer;
-    try {
-      detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(new FileInputStream(new File(params.getDetokenizer()))));
-    } catch (IOException e) {
-      System.err.println("Error while loading detokenizer dict: " + e.getMessage());
-      throw new TerminateToolException(-1);
-    }
-    
-    return new POSToTokenSampleStream(detokenizer,samples);
+    language = params.getLang();
+
+    ObjectStream<POSSample> samples = StreamFactoryRegistry.getFactory(POSSample.class,
+        ConllXPOSSampleStreamFactory.CONLLX_FORMAT).create(
+        ArgumentParser.filter(args, ConllXPOSSampleStreamFactory.Parameters.class));
+    return new POSToTokenSampleStream(createDetokenizer(params), samples);
   }
 }

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.params.DetokenizerParameter;
+import opennlp.tools.tokenize.DetokenizationDictionary;
+import opennlp.tools.tokenize.Detokenizer;
+import opennlp.tools.tokenize.DictionaryDetokenizer;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+/**
+ * Base class for factories which need detokenizer.
+ */
+public abstract class DetokenizerSampleStreamFactory<T> extends LanguageSampleStreamFactory<T> {
+
+  protected <P> DetokenizerSampleStreamFactory(Class<P> params) {
+    super(params);
+  }
+
+  protected Detokenizer createDetokenizer(DetokenizerParameter p) {
+    try {
+      return new DictionaryDetokenizer(new DetokenizationDictionary(
+          new FileInputStream(new File(p.getDetokenizer()))));
+    } catch (IOException e) {
+      throw new TerminateToolException(-1, "IO error while loading detokenizer dict: " + e.getMessage());
+    }
+  }
+}
\ No newline at end of file

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
+import opennlp.tools.doccat.DocumentSample;
+import opennlp.tools.doccat.DocumentSampleStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+import java.io.FileInputStream;
+
+/**
+ * Factory producing OpenNLP {@link DocumentSampleStream}s.
+ */
+public class DocumentSampleStreamFactory extends LanguageSampleStreamFactory<DocumentSample> {
+
+  interface Parameters extends LanguageFormatParams {
+  }
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(DocumentSample.class,
+        StreamFactoryRegistry.DEFAULT_FORMAT, new DocumentSampleStreamFactory(Parameters.class));
+  }
+
+  protected <P> DocumentSampleStreamFactory(Class<P> params) {
+    super(params);
+  }
+
+  public ObjectStream<DocumentSample> create(String[] args) {
+    Parameters params = ArgumentParser.parse(args, Parameters.class);
+
+    language = params.getLang();
+
+    CmdLineUtil.checkInputFile("Data", params.getData());
+    FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
+
+    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
+        params.getEncoding());
+
+    return new DocumentSampleStream(lineStream);
+  }
+}

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LanguageSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LanguageSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LanguageSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LanguageSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+/**
+ * Stream factory for those streams which carry language.
+ */
+public abstract class LanguageSampleStreamFactory<T> extends AbstractSampleStreamFactory<T> {
+
+  // language seems to belong to the stream, however, ObjectStream is used in 400+ places
+  // in the project and introducing new things to it is not a light decision.
+  protected String language;
+
+  protected <P> LanguageSampleStreamFactory(Class<P> params) {
+    super(params);
+  }
+
+  @Override
+  public String getLang() {
+    return language;
+  }
+}

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LanguageSampleStreamFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java Thu Dec  1 20:08:25 2011
@@ -20,8 +20,6 @@ package opennlp.tools.formats;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.PrintStream;
-import java.util.HashMap;
-import java.util.Map;
 
 import opennlp.tools.doccat.DocumentSample;
 import opennlp.tools.tokenize.SimpleTokenizer;
@@ -30,7 +28,7 @@ import opennlp.tools.util.PlainTextByLin
 
 /**
  * Stream filter to produce document samples out of a Leipzig sentences.txt file.
- * In the Leipzig corpus the encoding of the various senences.txt file is defined by
+ * In the Leipzig corpus the encoding of the various sentences.txt file is defined by
  * the language. The language must be specified to produce the category tags and is used
  * to determine the correct input encoding.
  * <p>
@@ -50,6 +48,7 @@ public class LeipzigDoccatSampleStream e
    * @param language the Leipzig input sentences.txt file
    * @param sentencesPerDocument the number of sentences which should be grouped into once {@link DocumentSample}
    * @param in the InputStream pointing to the contents of the sentences.txt input file
+   * @throws IOException IOException
    */
   LeipzigDoccatSampleStream(String language, int sentencesPerDocument, 
       InputStream in) throws IOException {

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -17,48 +17,43 @@
 
 package opennlp.tools.formats;
 
-import java.io.File;
 import java.io.IOException;
 
 import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
 import opennlp.tools.cmdline.CmdLineUtil;
-import opennlp.tools.cmdline.ObjectStreamFactory;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
 import opennlp.tools.doccat.DocumentSample;
 import opennlp.tools.util.ObjectStream;
 
 /**
  * <b>Note:</b> Do not use this class, internal use only!
  */
-public class LeipzigDocumentSampleStreamFactory implements ObjectStreamFactory<DocumentSample> {
+public class LeipzigDocumentSampleStreamFactory extends LanguageSampleStreamFactory<DocumentSample> {
 
-  interface Parameters {
-    @ParameterDescription(valueName = "languageCode")
-    String getLang();
-    
-    @ParameterDescription(valueName = "sampleData")
-    String getData();
+  interface Parameters extends LanguageFormatParams {
   }
-  
-  public String getUsage() {
-    return ArgumentParser.createUsage(Parameters.class);
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(DocumentSample.class,
+        "leipzig", new LeipzigDocumentSampleStreamFactory(Parameters.class));
   }
-  
-  public String validateArguments(String[] args) {
-    return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+
+  protected <P> LeipzigDocumentSampleStreamFactory(Class<P> params) {
+    super(params);
   }
-  
+
   public ObjectStream<DocumentSample> create(String[] args) {
     
     Parameters params = ArgumentParser.parse(args, Parameters.class);
+    language = params.getLang();
 
     try {
       return new LeipzigDoccatSampleStream(params.getLang(), 20,
-          CmdLineUtil.openInFile(new File(params.getData())));
+          CmdLineUtil.openInFile(params.getData()));
     } catch (IOException e) {
-      System.err.println("Cannot open sample data: " + e.getMessage());
-      throw new TerminateToolException(-1);
+      throw new TerminateToolException(-1, "IO error while opening sample data: " + e.getMessage());
     }
   }
 }

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import java.io.InputStreamReader;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
+import opennlp.tools.namefind.NameSample;
+import opennlp.tools.namefind.NameSampleDataStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+/**
+ * Factory producing OpenNLP {@link NameSampleDataStream}s.
+ */
+public class NameSampleDataStreamFactory extends LanguageSampleStreamFactory<NameSample> {
+
+  static interface Parameters extends LanguageFormatParams {
+  }
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(NameSample.class,
+        StreamFactoryRegistry.DEFAULT_FORMAT, new NameSampleDataStreamFactory(Parameters.class));
+  }
+
+  protected <P> NameSampleDataStreamFactory(Class<P> params) {
+    super(params);
+  }
+
+  public ObjectStream<NameSample> create(String[] args) {
+    Parameters params = ArgumentParser.parse(args, Parameters.class);
+    language = params.getLang();
+
+    CmdLineUtil.checkInputFile("Data", params.getData());
+
+    ObjectStream<String> lineStream;
+    lineStream = new PlainTextByLineStream(new InputStreamReader(
+        CmdLineUtil.openInFile(params.getData()), params.getEncoding()));
+
+    return new NameSampleDataStream(lineStream);
+  }
+}

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToSentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToSentenceSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToSentenceSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToSentenceSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -17,55 +17,37 @@
 
 package opennlp.tools.formats;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
 import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.cmdline.params.DetokenizerParameter;
 import opennlp.tools.namefind.NameSample;
 import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
 import opennlp.tools.util.ObjectStream;
 
 /**
  * <b>Note:</b> Do not use this class, internal use only!
  */
-public class NameToSentenceSampleStreamFactory implements
-    ObjectStreamFactory<SentenceSample> {
+public class NameToSentenceSampleStreamFactory extends DetokenizerSampleStreamFactory<SentenceSample> {
 
-  interface Parameters extends NameSampleStreamFactory.Parameters, DetokenizerParameter {
+  interface Parameters extends NameSampleDataStreamFactory.Parameters, DetokenizerParameter {
   }
-  
-  public String getUsage() {
-    return ArgumentParser.createUsage(Parameters.class);
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(SentenceSample.class,
+        "namefinder", new NameToSentenceSampleStreamFactory(Parameters.class));
   }
 
-  public String validateArguments(String[] args) {
-    return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+  protected <P> NameToSentenceSampleStreamFactory(Class<P> params) {
+    super(params);
   }
 
   public ObjectStream<SentenceSample> create(String[] args) {
     Parameters params = ArgumentParser.parse(args, Parameters.class);
+    language = params.getLang();
 
-    ObjectStream<NameSample> nameSampleStream = new NameSampleStreamFactory()
-        .create(params);
-
-    // TODO: Move this to a factory method
-    Detokenizer detokenizer;
-    try {
-      detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(
-          new FileInputStream(new File(params.getDetokenizer()))));
-    } catch (IOException e) {
-      System.err.println("Error while loading detokenizer dict: "
-          + e.getMessage());
-      throw new TerminateToolException(-1);
-    }
-
-    return new NameToSentenceSampleStream(detokenizer, nameSampleStream, 30);
+    ObjectStream<NameSample> nameSampleStream = StreamFactoryRegistry.getFactory(
+        NameSample.class, StreamFactoryRegistry.DEFAULT_FORMAT).create(
+        ArgumentParser.filter(args, NameSampleDataStreamFactory.Parameters.class));
+    return new NameToSentenceSampleStream(createDetokenizer(params), nameSampleStream, 30);
   }
 }

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToTokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToTokenSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToTokenSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToTokenSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -17,54 +17,37 @@
 
 package opennlp.tools.formats;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
 import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.cmdline.params.DetokenizerParameter;
 import opennlp.tools.namefind.NameSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
 import opennlp.tools.tokenize.TokenSample;
 import opennlp.tools.util.ObjectStream;
 
 /**
  * <b>Note:</b> Do not use this class, internal use only!
  */
-public class NameToTokenSampleStreamFactory implements ObjectStreamFactory<TokenSample> {
+public class NameToTokenSampleStreamFactory extends DetokenizerSampleStreamFactory<TokenSample> {
 
-  interface Parameters extends NameSampleStreamFactory.Parameters, DetokenizerParameter {
+  interface Parameters extends NameSampleDataStreamFactory.Parameters, DetokenizerParameter {
   }
-  
-  public String getUsage() {
-    return ArgumentParser.createUsage(Parameters.class);
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(TokenSample.class,
+        "namefinder", new NameToTokenSampleStreamFactory(Parameters.class));
   }
 
-  public String validateArguments(String[] args) {
-    return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+  protected <P> NameToTokenSampleStreamFactory(Class<P> params) {
+    super(params);
   }
 
   public ObjectStream<TokenSample> create(String[] args) {
     Parameters params = ArgumentParser.parse(args, Parameters.class);
+    language = params.getLang();
 
-    ObjectStream<NameSample> nameSampleStream = new NameSampleStreamFactory()
-        .create(params);
-
-    // TODO: Move this to a factory method
-    Detokenizer detokenizer;
-    try {
-      detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(
-          new FileInputStream(new File(params.getDetokenizer()))));
-    } catch (IOException e) {
-      System.err.println("Error while loading detokenizer dict: "
-          + e.getMessage());
-      throw new TerminateToolException(-1);
-    }
-
-    return new NameToTokenSampleStream(detokenizer, nameSampleStream);
+    ObjectStream<NameSample> nameSampleStream = StreamFactoryRegistry.getFactory(
+        NameSample.class, StreamFactoryRegistry.DEFAULT_FORMAT).create(
+        ArgumentParser.filter(args, NameSampleDataStreamFactory.Parameters.class));
+    return new NameToTokenSampleStream(createDetokenizer(params), nameSampleStream);
   }
 }

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToSentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToSentenceSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToSentenceSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToSentenceSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -17,54 +17,37 @@
 
 package opennlp.tools.formats;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
 import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.cmdline.params.DetokenizerParameter;
 import opennlp.tools.postag.POSSample;
 import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
 import opennlp.tools.util.ObjectStream;
 
 /**
  * <b>Note:</b> Do not use this class, internal use only!
  */
-public class POSToSentenceSampleStreamFactory implements
-    ObjectStreamFactory<SentenceSample> {
+public class POSToSentenceSampleStreamFactory extends DetokenizerSampleStreamFactory<SentenceSample> {
 
   interface Parameters extends WordTagSampleStreamFactory.Parameters, DetokenizerParameter {
   }
 
-  public String getUsage() {
-    return ArgumentParser.createUsage(Parameters.class);
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(SentenceSample.class,
+        "pos", new POSToSentenceSampleStreamFactory(Parameters.class));
   }
 
-  public String validateArguments(String[] args) {
-    return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+  protected <P> POSToSentenceSampleStreamFactory(Class<P> params) {
+    super(params);
   }
 
   public ObjectStream<SentenceSample> create(String[] args) {
     Parameters params = ArgumentParser.parse(args, Parameters.class);
+    language = params.getLang();
 
-    ObjectStream<POSSample> posSampleStream = new WordTagSampleStreamFactory()
-        .create(params);
-
-    Detokenizer detokenizer;
-    try {
-      detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(
-          new FileInputStream(new File(params.getDetokenizer()))));
-    } catch (IOException e) {
-      System.err.println("Error while loading detokenizer dict: "
-          + e.getMessage());
-      throw new TerminateToolException(-1);
-    }
-
-    return new POSToSentenceSampleStream(detokenizer, posSampleStream, 30);
+    ObjectStream<POSSample> posSampleStream = StreamFactoryRegistry.getFactory(POSSample.class,
+        StreamFactoryRegistry.DEFAULT_FORMAT).create(
+        ArgumentParser.filter(args, WordTagSampleStreamFactory.Parameters.class));
+    return new POSToSentenceSampleStream(createDetokenizer(params), posSampleStream, 30);
   }
 }

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToTokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToTokenSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToTokenSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToTokenSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -17,54 +17,37 @@
 
 package opennlp.tools.formats;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
 import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.cmdline.params.DetokenizerParameter;
 import opennlp.tools.postag.POSSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
 import opennlp.tools.tokenize.TokenSample;
 import opennlp.tools.util.ObjectStream;
 
 /**
  * <b>Note:</b> Do not use this class, internal use only!
  */
-public class POSToTokenSampleStreamFactory implements ObjectStreamFactory<TokenSample> {
+public class POSToTokenSampleStreamFactory extends DetokenizerSampleStreamFactory<TokenSample> {
 
   interface Parameters extends WordTagSampleStreamFactory.Parameters, DetokenizerParameter {
   }
-  
-  public String getUsage() {
-    return ArgumentParser.createUsage(Parameters.class);
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(TokenSample.class,
+        "pos", new POSToTokenSampleStreamFactory(Parameters.class));
   }
 
-  public String validateArguments(String[] args) {
-    return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+  protected <P> POSToTokenSampleStreamFactory(Class<P> params) {
+    super(params);
   }
 
   public ObjectStream<TokenSample> create(String[] args) {
     Parameters params = ArgumentParser.parse(args, Parameters.class);
+    language = params.getLang();
 
-    ObjectStream<POSSample> posSampleStream = new WordTagSampleStreamFactory()
-        .create(params);
-
-    // TODO: Move this to a factory method
-    Detokenizer detokenizer;
-    try {
-      detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(
-          new FileInputStream(new File(params.getDetokenizer()))));
-    } catch (IOException e) {
-      System.err.println("Error while loading detokenizer dict: "
-          + e.getMessage());
-      throw new TerminateToolException(-1);
-    }
-
-    return new POSToTokenSampleStream(detokenizer, posSampleStream);
+    ObjectStream<POSSample> posSampleStream = StreamFactoryRegistry.getFactory(POSSample.class,
+        StreamFactoryRegistry.DEFAULT_FORMAT).create(
+        ArgumentParser.filter(args, WordTagSampleStreamFactory.Parameters.class));
+    return new POSToTokenSampleStream(createDetokenizer(params), posSampleStream);
   }
 }

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
+import opennlp.tools.parser.Parse;
+import opennlp.tools.parser.ParseSampleStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+import java.io.FileInputStream;
+
+/**
+ * Factory producing OpenNLP {@link ParseSampleStream}s.
+ */
+public class ParseSampleStreamFactory extends LanguageSampleStreamFactory<Parse> {
+
+  interface Parameters extends LanguageFormatParams {
+  }
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(Parse.class,
+        StreamFactoryRegistry.DEFAULT_FORMAT, new ParseSampleStreamFactory(Parameters.class));
+  }
+
+  protected <P> ParseSampleStreamFactory(Class<P> params) {
+    super(params);
+  }
+
+  public ObjectStream<Parse> create(String[] args) {
+    Parameters params = ArgumentParser.parse(args, Parameters.class);
+
+    language = params.getLang();
+
+    CmdLineUtil.checkInputFile("Data", params.getData());
+    FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
+
+    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn
+        .getChannel(), params.getEncoding());
+
+    return new ParseSampleStream(lineStream);
+  }
+}
\ No newline at end of file

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
+import opennlp.tools.sentdetect.SentenceSample;
+import opennlp.tools.sentdetect.SentenceSampleStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+import java.io.FileInputStream;
+
+/**
+ * Factory producing OpenNLP {@link SentenceSampleStream}s.
+ */
+public class SentenceSampleStreamFactory extends LanguageSampleStreamFactory<SentenceSample> {
+
+  interface Parameters extends LanguageFormatParams {
+  }
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(SentenceSample.class,
+        StreamFactoryRegistry.DEFAULT_FORMAT, new SentenceSampleStreamFactory(Parameters.class));
+  }
+
+  protected <P> SentenceSampleStreamFactory(Class<P> params) {
+    super(params);
+  }
+
+  public ObjectStream<SentenceSample> create(String[] args) {
+    Parameters params = ArgumentParser.parse(args, Parameters.class);
+
+    language = params.getLang();
+
+    CmdLineUtil.checkInputFile("Data", params.getData());
+    FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
+
+    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
+        params.getEncoding());
+
+    return new SentenceSampleStream(lineStream);
+  }
+}
\ No newline at end of file

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
+import opennlp.tools.tokenize.TokenSample;
+import opennlp.tools.tokenize.TokenSampleStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+import java.io.FileInputStream;
+
+/**
+ * Factory producing OpenNLP {@link TokenSampleStream}s.
+ */
+public class TokenSampleStreamFactory extends LanguageSampleStreamFactory<TokenSample> {
+
+  interface Parameters extends LanguageFormatParams {
+  }
+
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(TokenSample.class,
+        StreamFactoryRegistry.DEFAULT_FORMAT, new TokenSampleStreamFactory(Parameters.class));
+  }
+
+  protected <P> TokenSampleStreamFactory(Class<P> params) {
+    super(params);
+  }
+
+  public ObjectStream<TokenSample> create(String[] args) {
+    Parameters params = ArgumentParser.parse(args, Parameters.class);
+
+    language = params.getLang();
+
+    CmdLineUtil.checkInputFile("Data", params.getData());
+    FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
+
+    ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
+        params.getEncoding());
+
+    return new TokenSampleStream(lineStream);
+  }
+}
\ No newline at end of file

Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -17,15 +17,12 @@
 
 package opennlp.tools.formats;
 
-import java.io.File;
 import java.io.InputStreamReader;
-import java.io.UnsupportedEncodingException;
 
 import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
 import opennlp.tools.cmdline.CmdLineUtil;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
 import opennlp.tools.postag.POSSample;
 import opennlp.tools.postag.WordTagSampleStream;
 import opennlp.tools.util.ObjectStream;
@@ -34,40 +31,29 @@ import opennlp.tools.util.PlainTextByLin
 /**
  * <b>Note:</b> Do not use this class, internal use only!
  */
-public class WordTagSampleStreamFactory implements ObjectStreamFactory<POSSample> {
+public class WordTagSampleStreamFactory extends LanguageSampleStreamFactory<POSSample> {
 
-  static interface Parameters {
-    
-    @ParameterDescription(valueName = "sampleData")
-    String getData();
-    
-    @ParameterDescription(valueName = "charsetName")
-    String getEncoding();
-  }
-  
-  public String getUsage() {
-    return ArgumentParser.createUsage(Parameters.class);
+  static interface Parameters extends LanguageFormatParams {
   }
 
-  public String validateArguments(String[] args) {
-    return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
-  }
-
-  ObjectStream<POSSample> create(Parameters params) {
-    ObjectStream<String> lineStream;
-    try {
-      lineStream = new PlainTextByLineStream(new InputStreamReader(
-          CmdLineUtil.openInFile(new File(params.getData())), params.getEncoding()));
-      
-      return new WordTagSampleStream(lineStream);
-    } catch (UnsupportedEncodingException e) {
-      System.err.println("Encoding not supported: " + params.getEncoding());
-      throw new TerminateToolException(-1);
-    }
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(POSSample.class,
+        StreamFactoryRegistry.DEFAULT_FORMAT, new WordTagSampleStreamFactory(Parameters.class));
   }
   
+  protected <P> WordTagSampleStreamFactory(Class<P> params) {
+    super(params);
+  }
+
   public ObjectStream<POSSample> create(String[] args) {
     Parameters params = ArgumentParser.parse(args, Parameters.class);
-    return create(params);
+    language = params.getLang();
+
+    CmdLineUtil.checkInputFile("Data", params.getData());
+    ObjectStream<String> lineStream;
+    lineStream = new PlainTextByLineStream(new InputStreamReader(
+        CmdLineUtil.openInFile(params.getData()), params.getEncoding()));
+
+    return new WordTagSampleStream(lineStream);
   }
-}
+}
\ No newline at end of file

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -25,8 +25,8 @@ import opennlp.tools.cmdline.ArgumentPar
 import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
 import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
 import opennlp.tools.cmdline.CmdLineUtil;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.formats.LanguageSampleStreamFactory;
 import opennlp.tools.util.ObjectStream;
 
 /**
@@ -35,16 +35,21 @@ import opennlp.tools.util.ObjectStream;
  * <p>
  * <b>Note:</b> Do not use this class, internal use only!
  */
-public class ADChunkSampleStreamFactory implements
-    ObjectStreamFactory<ChunkSample> {
+public class ADChunkSampleStreamFactory extends LanguageSampleStreamFactory<ChunkSample> {
 
   interface Parameters {
-    @ParameterDescription(valueName = "encoding")
+    //all have to be repeated, because encoding is not optional,
+    //according to the check if (encoding == null) { below (now removed)
+    @ParameterDescription(valueName = "charsetName",
+        description = "encoding for reading and writing text, if absent the system default is used.")
     Charset getEncoding();
 
-    @ParameterDescription(valueName = "sampleData")
-    String getData();
-    
+    @ParameterDescription(valueName = "sampleData", description = "data to be used, usually a file name.")
+    File getData();
+
+    @ParameterDescription(valueName = "language", description = "language which is being processed.")
+    String getLang();
+
     @ParameterDescription(valueName = "start", description = "index of first sentence")
     @OptionalParameter
     Integer getStart();
@@ -54,26 +59,25 @@ public class ADChunkSampleStreamFactory 
     Integer getEnd();
   }
 
-  public String getUsage() {
-    return ArgumentParser.createUsage(Parameters.class);
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(ChunkSample.class,
+        "ad", new ADChunkSampleStreamFactory(Parameters.class));
   }
 
-  public String validateArguments(String[] args) {
-    return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+  protected <P> ADChunkSampleStreamFactory(Class<P> params) {
+    super(params);
   }
 
   public ObjectStream<ChunkSample> create(String[] args) {
 
     Parameters params = ArgumentParser.parse(args, Parameters.class);
 
+    language = params.getLang();
+
     Charset encoding = params.getEncoding();
 
-    if (encoding == null) {
-      throw new TerminateToolException(1);
-    }
-    
-    ADChunkSampleStream sampleStream = new ADChunkSampleStream(CmdLineUtil.openInFile(new File(params
-        .getData())), encoding.name());
+    ADChunkSampleStream sampleStream =
+        new ADChunkSampleStream(CmdLineUtil.openInFile(params.getData()), encoding.name());
 
     if(params.getStart() != null && params.getStart() > -1) {
       sampleStream.setStart(params.getStart());

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java Thu Dec  1 20:08:25 2011
@@ -23,47 +23,50 @@ import java.nio.charset.Charset;
 import opennlp.tools.cmdline.ArgumentParser;
 import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
 import opennlp.tools.cmdline.CmdLineUtil;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.formats.LanguageSampleStreamFactory;
 import opennlp.tools.namefind.NameSample;
 import opennlp.tools.util.ObjectStream;
 
 /**
- * A Factory to create a Arvores Deitadas NameSampleStream from the command line
+ * A Factory to create a Arvores Deitadas NameSampleDataStream from the command line
  * utility.
  * <p>
  * <b>Note:</b> Do not use this class, internal use only!
  */
-public class ADNameSampleStreamFactory implements
-    ObjectStreamFactory<NameSample> {
+public class ADNameSampleStreamFactory extends LanguageSampleStreamFactory<NameSample> {
 
   interface Parameters {
-    @ParameterDescription(valueName = "encoding")
+    //all have to be repeated, because encoding is not optional,
+    //according to the check if (encoding == null) { below (now removed)
+    @ParameterDescription(valueName = "charsetName",
+        description = "encoding for reading and writing text, if absent the system default is used.")
     Charset getEncoding();
 
-    @ParameterDescription(valueName = "sampleData")
-    String getData();
+    @ParameterDescription(valueName = "sampleData", description = "data to be used, usually a file name.")
+    File getData();
+
+    @ParameterDescription(valueName = "language", description = "language which is being processed.")
+    String getLang();
   }
 
-  public String getUsage() {
-    return ArgumentParser.createUsage(Parameters.class);
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(NameSample.class,
+        "ad", new ADNameSampleStreamFactory(Parameters.class));
   }
 
-  public String validateArguments(String[] args) {
-    return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+  protected <P> ADNameSampleStreamFactory(Class<P> params) {
+    super(params);
   }
 
   public ObjectStream<NameSample> create(String[] args) {
 
     Parameters params = ArgumentParser.parse(args, Parameters.class);
 
-    Charset encoding = params.getEncoding();
+    language = params.getLang();
 
-    if (encoding == null) {
-      throw new TerminateToolException(1);
-    }
+    Charset encoding = params.getEncoding();
 
-    return new ADNameSampleStream(CmdLineUtil.openInFile(new File(params
-        .getData())), encoding.name());
+    return new ADNameSampleStream(CmdLineUtil.openInFile(params.getData()), encoding.name());
   }
 }

Modified: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java Thu Dec  1 20:08:25 2011
@@ -76,15 +76,45 @@ public class CLITest {
     try {
       CLI.main(new String[]{});
     } catch (ExitException e) {
+      assertEquals(0, e.status());
+    }
+  }
+
+  /**
+   * Ensure the main method prints error and returns 1.
+   */
+  @Test
+  public void testUnknownToolMessage() {
+    try {
+      CLI.main(new String[]{"unknown name"});
+    } catch (ExitException e) {
       assertEquals(1, e.status());
     }
-    
+  }
+
+  /**
+   * Ensure the tool checks the parameter and returns 1.
+   */
+  @Test
+  public void testToolParameterMessage() {
     try {
-      CLI.main(new String[]{"unkown name"});
+      CLI.main(new String[]{"DoccatTrainer", "-param", "value"});
     } catch (ExitException e) {
       assertEquals(1, e.status());
     }
   }
+
+  /**
+   * Ensure the main method prints error and returns -1
+   */
+  @Test
+  public void testUnknownFileMessage() {
+    try {
+      CLI.main(new String[]{"Doccat", "unknown.model"});
+    } catch (ExitException e) {
+      assertEquals(-1, e.status());
+    }
+  }
   
   
   /**
@@ -97,7 +127,7 @@ public class CLITest {
       try {
         CLI.main(new String[]{toolName, "help"});
       } catch (ExitException e) {
-        assertEquals(1, e.status());
+        assertEquals(0, e.status());
       }
     }
   }