You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/12/01 21:08:58 UTC
svn commit: r1209220 [4/4] - in /incubator/opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/
main/java/opennlp/tools/cmdline/dictionary/
main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/...
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXSentenceSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -17,56 +17,39 @@
package opennlp.tools.formats;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.params.DetokenizerParameter;
import opennlp.tools.postag.POSSample;
import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
import opennlp.tools.util.ObjectStream;
/**
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class ConllXSentenceSampleStreamFactory implements ObjectStreamFactory<SentenceSample> {
+public class ConllXSentenceSampleStreamFactory extends
+ DetokenizerSampleStreamFactory<SentenceSample> {
interface Parameters extends ConllXPOSSampleStreamFactory.Parameters, DetokenizerParameter {
- // TODO:
- // Make chunk size configurable
+ // TODO: make chunk size configurable
}
-
- public String getUsage() {
- return ArgumentParser.createUsage(Parameters.class);
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(SentenceSample.class,
+ ConllXPOSSampleStreamFactory.CONLLX_FORMAT, new ConllXSentenceSampleStreamFactory(Parameters.class));
}
- public String validateArguments(String[] args) {
- return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+ protected <P> ConllXSentenceSampleStreamFactory(Class<P> params) {
+ super(params);
}
public ObjectStream<SentenceSample> create(String[] args) {
-
Parameters params = ArgumentParser.parse(args, Parameters.class);
-
- // TODO: Compare code to ConllXTokenSampleStream, maybe it can be shared somehow
-
- ObjectStream<POSSample> posSampleStream =
- new ConllXPOSSampleStreamFactory().create(params);
-
- Detokenizer detokenizer;
- try {
- detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(new FileInputStream(new File(params.getDetokenizer()))));
- } catch (IOException e) {
- System.err.println("Error while loading detokenizer dict: " + e.getMessage());
- throw new TerminateToolException(-1);
- }
-
- return new POSToSentenceSampleStream(detokenizer, posSampleStream, 30);
+ language = params.getLang();
+
+ ObjectStream<POSSample> posSampleStream = StreamFactoryRegistry.getFactory(POSSample.class,
+ ConllXPOSSampleStreamFactory.CONLLX_FORMAT).create(
+ ArgumentParser.filter(args, ConllXPOSSampleStreamFactory.Parameters.class));
+ return new POSToSentenceSampleStream(createDetokenizer(params), posSampleStream, 30);
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXTokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXTokenSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXTokenSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ConllXTokenSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -17,51 +17,37 @@
package opennlp.tools.formats;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.params.DetokenizerParameter;
import opennlp.tools.postag.POSSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.util.ObjectStream;
/**
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class ConllXTokenSampleStreamFactory implements ObjectStreamFactory<TokenSample> {
-
+public class ConllXTokenSampleStreamFactory extends DetokenizerSampleStreamFactory<TokenSample> {
+
interface Parameters extends ConllXPOSSampleStreamFactory.Parameters, DetokenizerParameter {
}
-
- public String getUsage() {
- return ArgumentParser.createUsage(Parameters.class);
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(TokenSample.class,
+ ConllXPOSSampleStreamFactory.CONLLX_FORMAT, new ConllXTokenSampleStreamFactory(Parameters.class));
}
- public String validateArguments(String[] args) {
- return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+ protected <P> ConllXTokenSampleStreamFactory(Class<P> params) {
+ super(params);
}
public ObjectStream<TokenSample> create(String[] args) {
-
Parameters params = ArgumentParser.parse(args, Parameters.class);
-
- ObjectStream<POSSample> samples = new ConllXPOSSampleStreamFactory().create(params);
-
- Detokenizer detokenizer;
- try {
- detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(new FileInputStream(new File(params.getDetokenizer()))));
- } catch (IOException e) {
- System.err.println("Error while loading detokenizer dict: " + e.getMessage());
- throw new TerminateToolException(-1);
- }
-
- return new POSToTokenSampleStream(detokenizer,samples);
+ language = params.getLang();
+
+ ObjectStream<POSSample> samples = StreamFactoryRegistry.getFactory(POSSample.class,
+ ConllXPOSSampleStreamFactory.CONLLX_FORMAT).create(
+ ArgumentParser.filter(args, ConllXPOSSampleStreamFactory.Parameters.class));
+ return new POSToTokenSampleStream(createDetokenizer(params), samples);
}
}
Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.params.DetokenizerParameter;
+import opennlp.tools.tokenize.DetokenizationDictionary;
+import opennlp.tools.tokenize.Detokenizer;
+import opennlp.tools.tokenize.DictionaryDetokenizer;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+/**
+ * Base class for factories which need detokenizer.
+ */
+public abstract class DetokenizerSampleStreamFactory<T> extends LanguageSampleStreamFactory<T> {
+
+ protected <P> DetokenizerSampleStreamFactory(Class<P> params) {
+ super(params);
+ }
+
+ protected Detokenizer createDetokenizer(DetokenizerParameter p) {
+ try {
+ return new DictionaryDetokenizer(new DetokenizationDictionary(
+ new FileInputStream(new File(p.getDetokenizer()))));
+ } catch (IOException e) {
+ throw new TerminateToolException(-1, "IO error while loading detokenizer dict: " + e.getMessage());
+ }
+ }
+}
\ No newline at end of file
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DetokenizerSampleStreamFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
+import opennlp.tools.doccat.DocumentSample;
+import opennlp.tools.doccat.DocumentSampleStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+import java.io.FileInputStream;
+
+/**
+ * Factory producing OpenNLP {@link DocumentSampleStream}s.
+ */
+public class DocumentSampleStreamFactory extends LanguageSampleStreamFactory<DocumentSample> {
+
+ interface Parameters extends LanguageFormatParams {
+ }
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(DocumentSample.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT, new DocumentSampleStreamFactory(Parameters.class));
+ }
+
+ protected <P> DocumentSampleStreamFactory(Class<P> params) {
+ super(params);
+ }
+
+ public ObjectStream<DocumentSample> create(String[] args) {
+ Parameters params = ArgumentParser.parse(args, Parameters.class);
+
+ language = params.getLang();
+
+ CmdLineUtil.checkInputFile("Data", params.getData());
+ FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
+
+ ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
+ params.getEncoding());
+
+ return new DocumentSampleStream(lineStream);
+ }
+}
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LanguageSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LanguageSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LanguageSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LanguageSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+/**
+ * Stream factory for those streams which carry language.
+ */
+public abstract class LanguageSampleStreamFactory<T> extends AbstractSampleStreamFactory<T> {
+
+ // language seems to belong to the stream, however, ObjectStream is used in 400+ places
+ // in the project and introducing new things to it is not a light decision.
+ protected String language;
+
+ protected <P> LanguageSampleStreamFactory(Class<P> params) {
+ super(params);
+ }
+
+ @Override
+ public String getLang() {
+ return language;
+ }
+}
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LanguageSampleStreamFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java Thu Dec 1 20:08:25 2011
@@ -20,8 +20,6 @@ package opennlp.tools.formats;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
-import java.util.HashMap;
-import java.util.Map;
import opennlp.tools.doccat.DocumentSample;
import opennlp.tools.tokenize.SimpleTokenizer;
@@ -30,7 +28,7 @@ import opennlp.tools.util.PlainTextByLin
/**
* Stream filter to produce document samples out of a Leipzig sentences.txt file.
- * In the Leipzig corpus the encoding of the various senences.txt file is defined by
+ * In the Leipzig corpus the encoding of the various sentences.txt file is defined by
* the language. The language must be specified to produce the category tags and is used
* to determine the correct input encoding.
* <p>
@@ -50,6 +48,7 @@ public class LeipzigDoccatSampleStream e
* @param language the Leipzig input sentences.txt file
* @param sentencesPerDocument the number of sentences which should be grouped into once {@link DocumentSample}
* @param in the InputStream pointing to the contents of the sentences.txt input file
+ * @throws IOException IOException
*/
LeipzigDoccatSampleStream(String language, int sentencesPerDocument,
InputStream in) throws IOException {
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDocumentSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -17,48 +17,43 @@
package opennlp.tools.formats;
-import java.io.File;
import java.io.IOException;
import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
import opennlp.tools.cmdline.CmdLineUtil;
-import opennlp.tools.cmdline.ObjectStreamFactory;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
import opennlp.tools.doccat.DocumentSample;
import opennlp.tools.util.ObjectStream;
/**
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class LeipzigDocumentSampleStreamFactory implements ObjectStreamFactory<DocumentSample> {
+public class LeipzigDocumentSampleStreamFactory extends LanguageSampleStreamFactory<DocumentSample> {
- interface Parameters {
- @ParameterDescription(valueName = "languageCode")
- String getLang();
-
- @ParameterDescription(valueName = "sampleData")
- String getData();
+ interface Parameters extends LanguageFormatParams {
}
-
- public String getUsage() {
- return ArgumentParser.createUsage(Parameters.class);
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(DocumentSample.class,
+ "leipzig", new LeipzigDocumentSampleStreamFactory(Parameters.class));
}
-
- public String validateArguments(String[] args) {
- return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+
+ protected <P> LeipzigDocumentSampleStreamFactory(Class<P> params) {
+ super(params);
}
-
+
public ObjectStream<DocumentSample> create(String[] args) {
Parameters params = ArgumentParser.parse(args, Parameters.class);
+ language = params.getLang();
try {
return new LeipzigDoccatSampleStream(params.getLang(), 20,
- CmdLineUtil.openInFile(new File(params.getData())));
+ CmdLineUtil.openInFile(params.getData()));
} catch (IOException e) {
- System.err.println("Cannot open sample data: " + e.getMessage());
- throw new TerminateToolException(-1);
+ throw new TerminateToolException(-1, "IO error while opening sample data: " + e.getMessage());
}
}
}
Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import java.io.InputStreamReader;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
+import opennlp.tools.namefind.NameSample;
+import opennlp.tools.namefind.NameSampleDataStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+/**
+ * Factory producing OpenNLP {@link NameSampleDataStream}s.
+ */
+public class NameSampleDataStreamFactory extends LanguageSampleStreamFactory<NameSample> {
+
+ static interface Parameters extends LanguageFormatParams {
+ }
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(NameSample.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT, new NameSampleDataStreamFactory(Parameters.class));
+ }
+
+ protected <P> NameSampleDataStreamFactory(Class<P> params) {
+ super(params);
+ }
+
+ public ObjectStream<NameSample> create(String[] args) {
+ Parameters params = ArgumentParser.parse(args, Parameters.class);
+ language = params.getLang();
+
+ CmdLineUtil.checkInputFile("Data", params.getData());
+
+ ObjectStream<String> lineStream;
+ lineStream = new PlainTextByLineStream(new InputStreamReader(
+ CmdLineUtil.openInFile(params.getData()), params.getEncoding()));
+
+ return new NameSampleDataStream(lineStream);
+ }
+}
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToSentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToSentenceSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToSentenceSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToSentenceSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -17,55 +17,37 @@
package opennlp.tools.formats;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.params.DetokenizerParameter;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
import opennlp.tools.util.ObjectStream;
/**
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class NameToSentenceSampleStreamFactory implements
- ObjectStreamFactory<SentenceSample> {
+public class NameToSentenceSampleStreamFactory extends DetokenizerSampleStreamFactory<SentenceSample> {
- interface Parameters extends NameSampleStreamFactory.Parameters, DetokenizerParameter {
+ interface Parameters extends NameSampleDataStreamFactory.Parameters, DetokenizerParameter {
}
-
- public String getUsage() {
- return ArgumentParser.createUsage(Parameters.class);
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(SentenceSample.class,
+ "namefinder", new NameToSentenceSampleStreamFactory(Parameters.class));
}
- public String validateArguments(String[] args) {
- return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+ protected <P> NameToSentenceSampleStreamFactory(Class<P> params) {
+ super(params);
}
public ObjectStream<SentenceSample> create(String[] args) {
Parameters params = ArgumentParser.parse(args, Parameters.class);
+ language = params.getLang();
- ObjectStream<NameSample> nameSampleStream = new NameSampleStreamFactory()
- .create(params);
-
- // TODO: Move this to a factory method
- Detokenizer detokenizer;
- try {
- detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(
- new FileInputStream(new File(params.getDetokenizer()))));
- } catch (IOException e) {
- System.err.println("Error while loading detokenizer dict: "
- + e.getMessage());
- throw new TerminateToolException(-1);
- }
-
- return new NameToSentenceSampleStream(detokenizer, nameSampleStream, 30);
+ ObjectStream<NameSample> nameSampleStream = StreamFactoryRegistry.getFactory(
+ NameSample.class, StreamFactoryRegistry.DEFAULT_FORMAT).create(
+ ArgumentParser.filter(args, NameSampleDataStreamFactory.Parameters.class));
+ return new NameToSentenceSampleStream(createDetokenizer(params), nameSampleStream, 30);
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToTokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToTokenSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToTokenSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameToTokenSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -17,54 +17,37 @@
package opennlp.tools.formats;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.params.DetokenizerParameter;
import opennlp.tools.namefind.NameSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.util.ObjectStream;
/**
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class NameToTokenSampleStreamFactory implements ObjectStreamFactory<TokenSample> {
+public class NameToTokenSampleStreamFactory extends DetokenizerSampleStreamFactory<TokenSample> {
- interface Parameters extends NameSampleStreamFactory.Parameters, DetokenizerParameter {
+ interface Parameters extends NameSampleDataStreamFactory.Parameters, DetokenizerParameter {
}
-
- public String getUsage() {
- return ArgumentParser.createUsage(Parameters.class);
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(TokenSample.class,
+ "namefinder", new NameToTokenSampleStreamFactory(Parameters.class));
}
- public String validateArguments(String[] args) {
- return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+ protected <P> NameToTokenSampleStreamFactory(Class<P> params) {
+ super(params);
}
public ObjectStream<TokenSample> create(String[] args) {
Parameters params = ArgumentParser.parse(args, Parameters.class);
+ language = params.getLang();
- ObjectStream<NameSample> nameSampleStream = new NameSampleStreamFactory()
- .create(params);
-
- // TODO: Move this to a factory method
- Detokenizer detokenizer;
- try {
- detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(
- new FileInputStream(new File(params.getDetokenizer()))));
- } catch (IOException e) {
- System.err.println("Error while loading detokenizer dict: "
- + e.getMessage());
- throw new TerminateToolException(-1);
- }
-
- return new NameToTokenSampleStream(detokenizer, nameSampleStream);
+ ObjectStream<NameSample> nameSampleStream = StreamFactoryRegistry.getFactory(
+ NameSample.class, StreamFactoryRegistry.DEFAULT_FORMAT).create(
+ ArgumentParser.filter(args, NameSampleDataStreamFactory.Parameters.class));
+ return new NameToTokenSampleStream(createDetokenizer(params), nameSampleStream);
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToSentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToSentenceSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToSentenceSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToSentenceSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -17,54 +17,37 @@
package opennlp.tools.formats;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.params.DetokenizerParameter;
import opennlp.tools.postag.POSSample;
import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
import opennlp.tools.util.ObjectStream;
/**
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class POSToSentenceSampleStreamFactory implements
- ObjectStreamFactory<SentenceSample> {
+public class POSToSentenceSampleStreamFactory extends DetokenizerSampleStreamFactory<SentenceSample> {
interface Parameters extends WordTagSampleStreamFactory.Parameters, DetokenizerParameter {
}
- public String getUsage() {
- return ArgumentParser.createUsage(Parameters.class);
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(SentenceSample.class,
+ "pos", new POSToSentenceSampleStreamFactory(Parameters.class));
}
- public String validateArguments(String[] args) {
- return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+ protected <P> POSToSentenceSampleStreamFactory(Class<P> params) {
+ super(params);
}
public ObjectStream<SentenceSample> create(String[] args) {
Parameters params = ArgumentParser.parse(args, Parameters.class);
+ language = params.getLang();
- ObjectStream<POSSample> posSampleStream = new WordTagSampleStreamFactory()
- .create(params);
-
- Detokenizer detokenizer;
- try {
- detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(
- new FileInputStream(new File(params.getDetokenizer()))));
- } catch (IOException e) {
- System.err.println("Error while loading detokenizer dict: "
- + e.getMessage());
- throw new TerminateToolException(-1);
- }
-
- return new POSToSentenceSampleStream(detokenizer, posSampleStream, 30);
+ ObjectStream<POSSample> posSampleStream = StreamFactoryRegistry.getFactory(POSSample.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT).create(
+ ArgumentParser.filter(args, WordTagSampleStreamFactory.Parameters.class));
+ return new POSToSentenceSampleStream(createDetokenizer(params), posSampleStream, 30);
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToTokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToTokenSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToTokenSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/POSToTokenSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -17,54 +17,37 @@
package opennlp.tools.formats;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.params.DetokenizerParameter;
import opennlp.tools.postag.POSSample;
-import opennlp.tools.tokenize.DetokenizationDictionary;
-import opennlp.tools.tokenize.Detokenizer;
-import opennlp.tools.tokenize.DictionaryDetokenizer;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.util.ObjectStream;
/**
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class POSToTokenSampleStreamFactory implements ObjectStreamFactory<TokenSample> {
+public class POSToTokenSampleStreamFactory extends DetokenizerSampleStreamFactory<TokenSample> {
interface Parameters extends WordTagSampleStreamFactory.Parameters, DetokenizerParameter {
}
-
- public String getUsage() {
- return ArgumentParser.createUsage(Parameters.class);
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(TokenSample.class,
+ "pos", new POSToTokenSampleStreamFactory(Parameters.class));
}
- public String validateArguments(String[] args) {
- return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+ protected <P> POSToTokenSampleStreamFactory(Class<P> params) {
+ super(params);
}
public ObjectStream<TokenSample> create(String[] args) {
Parameters params = ArgumentParser.parse(args, Parameters.class);
+ language = params.getLang();
- ObjectStream<POSSample> posSampleStream = new WordTagSampleStreamFactory()
- .create(params);
-
- // TODO: Move this to a factory method
- Detokenizer detokenizer;
- try {
- detokenizer = new DictionaryDetokenizer(new DetokenizationDictionary(
- new FileInputStream(new File(params.getDetokenizer()))));
- } catch (IOException e) {
- System.err.println("Error while loading detokenizer dict: "
- + e.getMessage());
- throw new TerminateToolException(-1);
- }
-
- return new POSToTokenSampleStream(detokenizer, posSampleStream);
+ ObjectStream<POSSample> posSampleStream = StreamFactoryRegistry.getFactory(POSSample.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT).create(
+ ArgumentParser.filter(args, WordTagSampleStreamFactory.Parameters.class));
+ return new POSToTokenSampleStream(createDetokenizer(params), posSampleStream);
}
}
Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
+import opennlp.tools.parser.Parse;
+import opennlp.tools.parser.ParseSampleStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+import java.io.FileInputStream;
+
+/**
+ * Factory producing OpenNLP {@link ParseSampleStream}s.
+ */
+public class ParseSampleStreamFactory extends LanguageSampleStreamFactory<Parse> {
+
+ interface Parameters extends LanguageFormatParams {
+ }
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(Parse.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT, new ParseSampleStreamFactory(Parameters.class));
+ }
+
+ protected <P> ParseSampleStreamFactory(Class<P> params) {
+ super(params);
+ }
+
+ public ObjectStream<Parse> create(String[] args) {
+ Parameters params = ArgumentParser.parse(args, Parameters.class);
+
+ language = params.getLang();
+
+ CmdLineUtil.checkInputFile("Data", params.getData());
+ FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
+
+ ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn
+ .getChannel(), params.getEncoding());
+
+ return new ParseSampleStream(lineStream);
+ }
+}
\ No newline at end of file
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
+import opennlp.tools.sentdetect.SentenceSample;
+import opennlp.tools.sentdetect.SentenceSampleStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+import java.io.FileInputStream;
+
+/**
+ * Factory producing OpenNLP {@link SentenceSampleStream}s.
+ */
+public class SentenceSampleStreamFactory extends LanguageSampleStreamFactory<SentenceSample> {
+
+ interface Parameters extends LanguageFormatParams {
+ }
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(SentenceSample.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT, new SentenceSampleStreamFactory(Parameters.class));
+ }
+
+ protected <P> SentenceSampleStreamFactory(Class<P> params) {
+ super(params);
+ }
+
+ public ObjectStream<SentenceSample> create(String[] args) {
+ Parameters params = ArgumentParser.parse(args, Parameters.class);
+
+ language = params.getLang();
+
+ CmdLineUtil.checkInputFile("Data", params.getData());
+ FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
+
+ ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
+ params.getEncoding());
+
+ return new SentenceSampleStream(lineStream);
+ }
+}
\ No newline at end of file
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java?rev=1209220&view=auto
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java (added)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.CmdLineUtil;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
+import opennlp.tools.tokenize.TokenSample;
+import opennlp.tools.tokenize.TokenSampleStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+
+import java.io.FileInputStream;
+
+/**
+ * Factory producing OpenNLP {@link TokenSampleStream}s.
+ */
+public class TokenSampleStreamFactory extends LanguageSampleStreamFactory<TokenSample> {
+
+ interface Parameters extends LanguageFormatParams {
+ }
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(TokenSample.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT, new TokenSampleStreamFactory(Parameters.class));
+ }
+
+ protected <P> TokenSampleStreamFactory(Class<P> params) {
+ super(params);
+ }
+
+ public ObjectStream<TokenSample> create(String[] args) {
+ Parameters params = ArgumentParser.parse(args, Parameters.class);
+
+ language = params.getLang();
+
+ CmdLineUtil.checkInputFile("Data", params.getData());
+ FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
+
+ ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
+ params.getEncoding());
+
+ return new TokenSampleStream(lineStream);
+ }
+}
\ No newline at end of file
Propchange: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -17,15 +17,12 @@
package opennlp.tools.formats;
-import java.io.File;
import java.io.InputStreamReader;
-import java.io.UnsupportedEncodingException;
import opennlp.tools.cmdline.ArgumentParser;
-import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
import opennlp.tools.cmdline.CmdLineUtil;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.LanguageFormatParams;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.WordTagSampleStream;
import opennlp.tools.util.ObjectStream;
@@ -34,40 +31,29 @@ import opennlp.tools.util.PlainTextByLin
/**
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class WordTagSampleStreamFactory implements ObjectStreamFactory<POSSample> {
+public class WordTagSampleStreamFactory extends LanguageSampleStreamFactory<POSSample> {
- static interface Parameters {
-
- @ParameterDescription(valueName = "sampleData")
- String getData();
-
- @ParameterDescription(valueName = "charsetName")
- String getEncoding();
- }
-
- public String getUsage() {
- return ArgumentParser.createUsage(Parameters.class);
+ static interface Parameters extends LanguageFormatParams {
}
- public String validateArguments(String[] args) {
- return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
- }
-
- ObjectStream<POSSample> create(Parameters params) {
- ObjectStream<String> lineStream;
- try {
- lineStream = new PlainTextByLineStream(new InputStreamReader(
- CmdLineUtil.openInFile(new File(params.getData())), params.getEncoding()));
-
- return new WordTagSampleStream(lineStream);
- } catch (UnsupportedEncodingException e) {
- System.err.println("Encoding not supported: " + params.getEncoding());
- throw new TerminateToolException(-1);
- }
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(POSSample.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT, new WordTagSampleStreamFactory(Parameters.class));
}
+ protected <P> WordTagSampleStreamFactory(Class<P> params) {
+ super(params);
+ }
+
public ObjectStream<POSSample> create(String[] args) {
Parameters params = ArgumentParser.parse(args, Parameters.class);
- return create(params);
+ language = params.getLang();
+
+ CmdLineUtil.checkInputFile("Data", params.getData());
+ ObjectStream<String> lineStream;
+ lineStream = new PlainTextByLineStream(new InputStreamReader(
+ CmdLineUtil.openInFile(params.getData()), params.getEncoding()));
+
+ return new WordTagSampleStream(lineStream);
}
-}
+}
\ No newline at end of file
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -25,8 +25,8 @@ import opennlp.tools.cmdline.ArgumentPar
import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
import opennlp.tools.cmdline.CmdLineUtil;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.formats.LanguageSampleStreamFactory;
import opennlp.tools.util.ObjectStream;
/**
@@ -35,16 +35,21 @@ import opennlp.tools.util.ObjectStream;
* <p>
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class ADChunkSampleStreamFactory implements
- ObjectStreamFactory<ChunkSample> {
+public class ADChunkSampleStreamFactory extends LanguageSampleStreamFactory<ChunkSample> {
interface Parameters {
- @ParameterDescription(valueName = "encoding")
+ //all have to be repeated, because encoding is not optional,
+ //according to the check if (encoding == null) { below (now removed)
+ @ParameterDescription(valueName = "charsetName",
+ description = "encoding for reading and writing text, if absent the system default is used.")
Charset getEncoding();
- @ParameterDescription(valueName = "sampleData")
- String getData();
-
+ @ParameterDescription(valueName = "sampleData", description = "data to be used, usually a file name.")
+ File getData();
+
+ @ParameterDescription(valueName = "language", description = "language which is being processed.")
+ String getLang();
+
@ParameterDescription(valueName = "start", description = "index of first sentence")
@OptionalParameter
Integer getStart();
@@ -54,26 +59,25 @@ public class ADChunkSampleStreamFactory
Integer getEnd();
}
- public String getUsage() {
- return ArgumentParser.createUsage(Parameters.class);
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(ChunkSample.class,
+ "ad", new ADChunkSampleStreamFactory(Parameters.class));
}
- public String validateArguments(String[] args) {
- return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+ protected <P> ADChunkSampleStreamFactory(Class<P> params) {
+ super(params);
}
public ObjectStream<ChunkSample> create(String[] args) {
Parameters params = ArgumentParser.parse(args, Parameters.class);
+ language = params.getLang();
+
Charset encoding = params.getEncoding();
- if (encoding == null) {
- throw new TerminateToolException(1);
- }
-
- ADChunkSampleStream sampleStream = new ADChunkSampleStream(CmdLineUtil.openInFile(new File(params
- .getData())), encoding.name());
+ ADChunkSampleStream sampleStream =
+ new ADChunkSampleStream(CmdLineUtil.openInFile(params.getData()), encoding.name());
if(params.getStart() != null && params.getStart() > -1) {
sampleStream.setStart(params.getStart());
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java Thu Dec 1 20:08:25 2011
@@ -23,47 +23,50 @@ import java.nio.charset.Charset;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
import opennlp.tools.cmdline.CmdLineUtil;
-import opennlp.tools.cmdline.ObjectStreamFactory;
-import opennlp.tools.cmdline.TerminateToolException;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.formats.LanguageSampleStreamFactory;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.ObjectStream;
/**
- * A Factory to create a Arvores Deitadas NameSampleStream from the command line
+ * A Factory to create a Arvores Deitadas NameSampleDataStream from the command line
* utility.
* <p>
* <b>Note:</b> Do not use this class, internal use only!
*/
-public class ADNameSampleStreamFactory implements
- ObjectStreamFactory<NameSample> {
+public class ADNameSampleStreamFactory extends LanguageSampleStreamFactory<NameSample> {
interface Parameters {
- @ParameterDescription(valueName = "encoding")
+ //all have to be repeated, because encoding is not optional,
+ //according to the check if (encoding == null) { below (now removed)
+ @ParameterDescription(valueName = "charsetName",
+ description = "encoding for reading and writing text, if absent the system default is used.")
Charset getEncoding();
- @ParameterDescription(valueName = "sampleData")
- String getData();
+ @ParameterDescription(valueName = "sampleData", description = "data to be used, usually a file name.")
+ File getData();
+
+ @ParameterDescription(valueName = "language", description = "language which is being processed.")
+ String getLang();
}
- public String getUsage() {
- return ArgumentParser.createUsage(Parameters.class);
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(NameSample.class,
+ "ad", new ADNameSampleStreamFactory(Parameters.class));
}
- public String validateArguments(String[] args) {
- return ArgumentParser.validateArgumentsLoudly(args, Parameters.class);
+ protected <P> ADNameSampleStreamFactory(Class<P> params) {
+ super(params);
}
public ObjectStream<NameSample> create(String[] args) {
Parameters params = ArgumentParser.parse(args, Parameters.class);
- Charset encoding = params.getEncoding();
+ language = params.getLang();
- if (encoding == null) {
- throw new TerminateToolException(1);
- }
+ Charset encoding = params.getEncoding();
- return new ADNameSampleStream(CmdLineUtil.openInFile(new File(params
- .getData())), encoding.name());
+ return new ADNameSampleStream(CmdLineUtil.openInFile(params.getData()), encoding.name());
}
}
Modified: incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java?rev=1209220&r1=1209219&r2=1209220&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/cmdline/CLITest.java Thu Dec 1 20:08:25 2011
@@ -76,15 +76,45 @@ public class CLITest {
try {
CLI.main(new String[]{});
} catch (ExitException e) {
+ assertEquals(0, e.status());
+ }
+ }
+
+ /**
+ * Ensure the main method prints error and returns 1.
+ */
+ @Test
+ public void testUnknownToolMessage() {
+ try {
+ CLI.main(new String[]{"unknown name"});
+ } catch (ExitException e) {
assertEquals(1, e.status());
}
-
+ }
+
+ /**
+ * Ensure the tool checks the parameter and returns 1.
+ */
+ @Test
+ public void testToolParameterMessage() {
try {
- CLI.main(new String[]{"unkown name"});
+ CLI.main(new String[]{"DoccatTrainer", "-param", "value"});
} catch (ExitException e) {
assertEquals(1, e.status());
}
}
+
+ /**
+ * Ensure the main method prints error and returns -1
+ */
+ @Test
+ public void testUnknownFileMessage() {
+ try {
+ CLI.main(new String[]{"Doccat", "unknown.model"});
+ } catch (ExitException e) {
+ assertEquals(-1, e.status());
+ }
+ }
/**
@@ -97,7 +127,7 @@ public class CLITest {
try {
CLI.main(new String[]{toolName, "help"});
} catch (ExitException e) {
- assertEquals(1, e.status());
+ assertEquals(0, e.status());
}
}
}