You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by ma...@apache.org on 2014/02/04 18:10:12 UTC
svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/
main/java/opennlp/tools/cmdline/doccat/
main/java/opennlp/tools/cmdline/namefind/ main/java/opennlp/tools/cmdlin...
Author: markg
Date: Tue Feb 4 17:10:11 2014
New Revision: 1564379
URL: http://svn.apache.org/r1564379
Log:
OPENNLP-600
Changed to MockInputStreamFactory everywhere except where a reader was being used in the PlainTextBylineStream constructor
Added:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/MockInputStreamFactory.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/BeamSearch.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/InputStreamFactory.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkSampleTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerDetailedFMeasureListenerTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerEvaluatorTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADChunkSampleStreamTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADNameSampleStreamTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADPOSSampleStreamTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADParagraphStreamTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADSentenceSampleStreamTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/DictionaryNameFinderEvaluatorTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/NameSampleDataStreamTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/parser/ParseSampleStreamTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java
opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/MarkableFileInputStream.java Tue Feb 4 17:10:11 2014
@@ -36,7 +36,7 @@ public class MarkableFileInputStream ext
MarkableFileInputStream(File file) throws FileNotFoundException {
in = new FileInputStream(file);
}
-
+
@Override
public synchronized void mark(int readlimit) {
try {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/ChunkerMETool.java Tue Feb 4 17:10:11 2014
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.cmdline.chunker;
import java.io.File;
@@ -30,6 +29,7 @@ import opennlp.tools.cmdline.CmdLineUtil
import opennlp.tools.cmdline.PerformanceMonitor;
import opennlp.tools.postag.POSSample;
import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -38,7 +38,7 @@ public class ChunkerMETool extends Basic
public String getShortDescription() {
return "learnable chunker";
}
-
+
public String getHelp() {
return "Usage: " + CLI.CMD + " " + getName() + " model < sentences";
}
@@ -51,13 +51,12 @@ public class ChunkerMETool extends Basic
ChunkerME chunker = new ChunkerME(model, ChunkerME.DEFAULT_BEAM_SIZE);
- ObjectStream<String> lineStream =
- new PlainTextByLineStream(new InputStreamReader(System.in));
-
- PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
- perfMon.start();
+ ObjectStream<String> lineStream = null;
+ PerformanceMonitor perfMon = null;
try {
+ lineStream = new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
+ perfMon = new PerformanceMonitor(System.err, "sent");
String line;
while ((line = lineStream.read()) != null) {
@@ -71,15 +70,14 @@ public class ChunkerMETool extends Basic
}
String[] chunks = chunker.chunk(posSample.getSentence(),
- posSample.getTags());
+ posSample.getTags());
System.out.println(new ChunkSample(posSample.getSentence(),
- posSample.getTags(), chunks).nicePrint());
+ posSample.getTags(), chunks).nicePrint());
perfMon.incrementCounter();
}
- }
- catch (IOException e) {
+ } catch (IOException e) {
CmdLineUtil.handleStdinIoError(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTool.java Tue Feb 4 17:10:11 2014
@@ -14,12 +14,10 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.cmdline.doccat;
import java.io.File;
import java.io.IOException;
-import java.io.InputStreamReader;
import opennlp.tools.cmdline.BasicCmdLineTool;
import opennlp.tools.cmdline.CLI;
@@ -32,19 +30,23 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ParagraphStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.MockInputStreamFactory;
public class DoccatTool extends BasicCmdLineTool {
+ @Override
public String getShortDescription() {
return "learnable document categorizer";
}
-
+
+ @Override
public String getHelp() {
return "Usage: " + CLI.CMD + " " + getName() + " model < documents";
}
+ @Override
public void run(String[] args) {
-
+
if (0 == args.length) {
System.out.println(getHelp());
} else {
@@ -53,13 +55,21 @@ public class DoccatTool extends BasicCmd
DocumentCategorizerME doccat = new DocumentCategorizerME(model);
- ObjectStream<String> documentStream = new ParagraphStream(
- new PlainTextByLineStream(new InputStreamReader(System.in)));
+ //ObjectStream<String> documentStream = new ParagraphStream(
+ // new PlainTextByLineStream(new InputStreamReader(System.in)));
+ /**
+ * moved initialization to the try block to catch new IOException
+ */
+ ObjectStream<String> documentStream;
+
+
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "doc");
perfMon.start();
try {
+ documentStream = new ParagraphStream(
+ new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8"));
String document;
while ((document = documentStream.read()) != null) {
double prob[] = doccat.categorize(WhitespaceTokenizer.INSTANCE.tokenize(document));
@@ -70,8 +80,7 @@ public class DoccatTool extends BasicCmd
perfMon.incrementCounter();
}
- }
- catch (IOException e) {
+ } catch (IOException e) {
CmdLineUtil.handleStdinIoError(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java Tue Feb 4 17:10:11 2014
@@ -40,10 +40,12 @@ public class DoccatTrainerTool
super(DocumentSample.class, TrainerToolParams.class);
}
+ @Override
public String getShortDescription() {
return "trainer for the learnable document categorizer";
}
+ @Override
public void run(String format, String[] args) {
super.run(format, args);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTool.java Tue Feb 4 17:10:11 2014
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.cmdline.namefind;
import java.io.File;
@@ -33,6 +32,7 @@ import opennlp.tools.namefind.NameSample
import opennlp.tools.namefind.TokenNameFinder;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -42,17 +42,17 @@ public final class TokenNameFinderTool e
public String getShortDescription() {
return "learnable name finder";
}
-
+
public String getHelp() {
return "Usage: " + CLI.CMD + " " + getName() + " model1 model2 ... modelN < sentences";
}
-
+
public void run(String[] args) {
-
+
if (args.length == 0) {
System.out.println(getHelp());
} else {
-
+
NameFinderME nameFinders[] = new NameFinderME[args.length];
for (int i = 0; i < nameFinders.length; i++) {
@@ -60,15 +60,17 @@ public final class TokenNameFinderTool e
nameFinders[i] = new NameFinderME(model);
}
- ObjectStream<String> untokenizedLineStream =
- new PlainTextByLineStream(new InputStreamReader(System.in));
-
+// ObjectStream<String> untokenizedLineStream =
+// new PlainTextByLineStream(new InputStreamReader(System.in));
+ ObjectStream<String> untokenizedLineStream;
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
perfMon.start();
try {
+ untokenizedLineStream =
+ new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
String line;
- while((line = untokenizedLineStream.read()) != null) {
+ while ((line = untokenizedLineStream.read()) != null) {
String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
// A new line indicates a new document,
@@ -89,17 +91,16 @@ public final class TokenNameFinderTool e
// Simple way to drop intersecting spans, otherwise the
// NameSample is invalid
Span reducedNames[] = NameFinderME.dropOverlappingSpans(
- names.toArray(new Span[names.size()]));
+ names.toArray(new Span[names.size()]));
NameSample nameSample = new NameSample(whitespaceTokenizerLine,
- reducedNames, false);
+ reducedNames, false);
System.out.println(nameSample.toString());
perfMon.incrementCounter();
}
- }
- catch (IOException e) {
+ } catch (IOException e) {
CmdLineUtil.handleStdinIoError(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Tue Feb 4 17:10:11 2014
@@ -24,10 +24,8 @@ import java.util.HashMap;
import java.util.Map;
import opennlp.tools.cmdline.AbstractTrainerTool;
-import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
-import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
import opennlp.tools.cmdline.namefind.TokenNameFinderTrainerTool.TrainerToolParams;
import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.namefind.NameSample;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTool.java Tue Feb 4 17:10:11 2014
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.cmdline.parser;
import java.io.File;
@@ -34,6 +33,7 @@ import opennlp.tools.parser.AbstractBott
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.ParserFactory;
import opennlp.tools.parser.ParserModel;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -43,14 +43,13 @@ public final class ParserTool extends Ba
public String getShortDescription() {
return "performs full syntactic parsing";
}
-
+
public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " [-bs n -ap n -k n] model < sentences \n" +
- "-bs n: Use a beam size of n.\n" +
- "-ap f: Advance outcomes in with at least f% of the probability mass.\n" +
- "-k n: Show the top n parses. This will also display their log-probablities.";
+ return "Usage: " + CLI.CMD + " " + getName() + " [-bs n -ap n -k n] model < sentences \n"
+ + "-bs n: Use a beam size of n.\n"
+ + "-ap f: Advance outcomes in with at least f% of the probability mass.\n"
+ + "-k n: Show the top n parses. This will also display their log-probablities.";
}
-
private static Pattern untokenizedParenPattern1 = Pattern.compile("([^ ])([({)}])");
private static Pattern untokenizedParenPattern2 = Pattern.compile("([({)}])([^ ])");
@@ -68,70 +67,69 @@ public final class ParserTool extends Ba
String text = sb.substring(0, sb.length() - 1);
Parse p = new Parse(text, new Span(0, text.length()), AbstractBottomUpParser.INC_NODE, 0, 0);
int start = 0;
- int i=0;
- for (Iterator<String> ti = tokens.iterator(); ti.hasNext();i++) {
+ int i = 0;
+ for (Iterator<String> ti = tokens.iterator(); ti.hasNext(); i++) {
String tok = ti.next();
- p.insert(new Parse(text, new Span(start, start + tok.length()), AbstractBottomUpParser.TOK_NODE, 0,i));
+ p.insert(new Parse(text, new Span(start, start + tok.length()), AbstractBottomUpParser.TOK_NODE, 0, i));
start += tok.length() + 1;
}
Parse[] parses;
if (numParses == 1) {
- parses = new Parse[] { parser.parse(p)};
- }
- else {
- parses = parser.parse(p,numParses);
+ parses = new Parse[]{parser.parse(p)};
+ } else {
+ parses = parser.parse(p, numParses);
}
return parses;
}
-
+
public void run(String[] args) {
-
+
if (args.length < 1) {
System.out.println(getHelp());
} else {
-
+
ParserModel model = new ParserModelLoader().load(new File(args[args.length - 1]));
Integer beamSize = CmdLineUtil.getIntParameter("-bs", args);
- if (beamSize == null)
- beamSize = AbstractBottomUpParser.defaultBeamSize;
+ if (beamSize == null) {
+ beamSize = AbstractBottomUpParser.defaultBeamSize;
+ }
Integer numParses = CmdLineUtil.getIntParameter("-k", args);
boolean showTopK;
if (numParses == null) {
numParses = 1;
showTopK = false;
- }
- else {
+ } else {
showTopK = true;
}
Double advancePercentage = CmdLineUtil.getDoubleParameter("-ap", args);
- if (advancePercentage == null)
+ if (advancePercentage == null) {
advancePercentage = AbstractBottomUpParser.defaultAdvancePercentage;
+ }
opennlp.tools.parser.Parser parser =
- ParserFactory.create(model, beamSize, advancePercentage);
+ ParserFactory.create(model, beamSize, advancePercentage);
- ObjectStream<String> lineStream =
- new PlainTextByLineStream(new InputStreamReader(System.in));
-
- PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
- perfMon.start();
+ ObjectStream<String> lineStream = null;
+ PerformanceMonitor perfMon = null;
try {
+ lineStream = new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
+ perfMon = new PerformanceMonitor(System.err, "sent");
+ perfMon.start();
String line;
while ((line = lineStream.read()) != null) {
if (line.length() == 0) {
System.out.println();
- }
- else {
+ } else {
Parse[] parses = parseLine(line, parser, numParses);
- for (int pi=0,pn=parses.length;pi<pn;pi++) {
+ for (int pi = 0, pn = parses.length; pi < pn; pi++) {
if (showTopK) {
- System.out.print(pi+" "+parses[pi].getProb()+" ");
+ System.out.print(pi + " " + parses[pi].getProb() + " ");
}
parses[pi].show();
@@ -140,8 +138,7 @@ public final class ParserTool extends Ba
}
}
}
- }
- catch (IOException e) {
+ } catch (IOException e) {
CmdLineUtil.handleStdinIoError(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java Tue Feb 4 17:10:11 2014
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.cmdline.postag;
import java.io.File;
@@ -29,6 +28,7 @@ import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -37,28 +37,28 @@ public final class POSTaggerTool extends
public String getShortDescription() {
return "learnable part of speech tagger";
}
-
+
public String getHelp() {
return "Usage: " + CLI.CMD + " " + getName() + " model < sentences";
}
public void run(String[] args) {
-
+
if (args.length != 1) {
System.out.println(getHelp());
} else {
-
+
POSModel model = new POSModelLoader().load(new File(args[0]));
POSTaggerME tagger = new POSTaggerME(model);
- ObjectStream<String> lineStream =
- new PlainTextByLineStream(new InputStreamReader(System.in));
-
- PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
- perfMon.start();
+ ObjectStream<String> lineStream = null;
+ PerformanceMonitor perfMon = null;
try {
+ lineStream = new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
+ perfMon = new PerformanceMonitor(System.err, "sent");
+ perfMon.start();
String line;
while ((line = lineStream.read()) != null) {
@@ -70,8 +70,7 @@ public final class POSTaggerTool extends
perfMon.incrementCounter();
}
- }
- catch (IOException e) {
+ } catch (IOException e) {
CmdLineUtil.handleStdinIoError(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorTool.java Tue Feb 4 17:10:11 2014
@@ -27,6 +27,7 @@ import opennlp.tools.cmdline.CmdLineUtil
import opennlp.tools.cmdline.PerformanceMonitor;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.ParagraphStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -59,13 +60,12 @@ public final class SentenceDetectorTool
SentenceDetectorME sdetector = new SentenceDetectorME(model);
- ObjectStream<String> paraStream =
- new ParagraphStream(new PlainTextByLineStream(new InputStreamReader(System.in)));
-
- PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
- perfMon.start();
+ ObjectStream<String> paraStream = null;
+ PerformanceMonitor perfMon = null;
try {
+ paraStream = new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
+ perfMon = new PerformanceMonitor(System.err, "sent");
String para;
while ((para = paraStream.read()) != null) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java Tue Feb 4 17:10:11 2014
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.cmdline.tokenizer;
import java.io.IOException;
@@ -25,39 +24,43 @@ import opennlp.tools.cmdline.Performance
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerStream;
import opennlp.tools.tokenize.WhitespaceTokenStream;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
final class CommandLineTokenizer {
private final Tokenizer tokenizer;
-
+
CommandLineTokenizer(Tokenizer tokenizer) {
this.tokenizer = tokenizer;
}
-
+
void process() {
-
- ObjectStream<String> untokenizedLineStream =
- new PlainTextByLineStream(new InputStreamReader(System.in));
-
- ObjectStream<String> tokenizedLineStream = new WhitespaceTokenStream(
- new TokenizerStream(tokenizer, untokenizedLineStream));
-
- PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
- perfMon.start();
-
+ ObjectStream<String> untokenizedLineStream = null;
+
+ ObjectStream<String> tokenizedLineStream = null;
+ PerformanceMonitor perfMon = null;
try {
+ untokenizedLineStream =
+ new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
+
+ tokenizedLineStream = new WhitespaceTokenStream(
+ new TokenizerStream(tokenizer, untokenizedLineStream));
+
+ perfMon = new PerformanceMonitor(System.err, "sent");
+ perfMon.start();
+
+
String tokenizedLine;
while ((tokenizedLine = tokenizedLineStream.read()) != null) {
System.out.println(tokenizedLine);
perfMon.incrementCounter();
}
- }
- catch (IOException e) {
+ } catch (IOException e) {
CmdLineUtil.handleStdinIoError(e);
}
-
+
perfMon.stopAndPrintFinalResult();
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/DictionaryDetokenizerTool.java Tue Feb 4 17:10:11 2014
@@ -28,6 +28,7 @@ import opennlp.tools.cmdline.Performance
import opennlp.tools.tokenize.Detokenizer;
import opennlp.tools.tokenize.DictionaryDetokenizer;
import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -43,17 +44,17 @@ public final class DictionaryDetokenizer
if (args.length != 1) {
System.out.println(getHelp());
} else {
-
+ try {
Detokenizer detokenizer = new DictionaryDetokenizer(
new DetokenizationDictionaryLoader().load(new File(args[0])));
ObjectStream<String> tokenizedLineStream =
- new PlainTextByLineStream(new InputStreamReader(System.in));
+ new PlainTextByLineStream(new MockInputStreamFactory(System.in),"UTF-8");
PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
perfMon.start();
- try {
+
String tokenizedLine;
while ((tokenizedLine = tokenizedLineStream.read()) != null) {
@@ -64,12 +65,13 @@ public final class DictionaryDetokenizer
perfMon.incrementCounter();
}
+ perfMon.stopAndPrintFinalResult();
}
catch (IOException e) {
CmdLineUtil.handleStdinIoError(e);
}
- perfMon.stopAndPrintFinalResult();
+
}
}
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/BagOfWordsFeatureGenerator.java Tue Feb 4 17:10:11 2014
@@ -37,6 +37,7 @@ public class BagOfWordsFeatureGenerator
this.useOnlyAllLetterTokens = useOnlyAllLetterTokens;
}
+ @Override
public Collection<String> extractFeatures(String[] text) {
Collection<String> bagOfWords = new ArrayList<String>(text.length);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DocumentCategorizerME.java Tue Feb 4 17:10:11 2014
@@ -23,11 +23,8 @@ import java.io.ObjectStreamException;
import java.util.HashMap;
import java.util.Map;
-import opennlp.tools.ml.maxent.GIS;
-import opennlp.tools.ml.model.AbstractModel;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.TrainUtil;
-import opennlp.tools.ml.model.TwoPassDataIndexer;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.ObjectStream;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/BioNLP2004NameSampleStream.java Tue Feb 4 17:10:11 2014
@@ -25,6 +25,7 @@ import java.util.ArrayList;
import java.util.List;
import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -54,9 +55,9 @@ public class BioNLP2004NameSampleStream
public BioNLP2004NameSampleStream(InputStream in, int types) {
try {
- this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+ this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8");
System.setOut(new PrintStream(System.out, true, "UTF-8"));
- } catch (UnsupportedEncodingException e) {
+ } catch (IOException e) {
// UTF-8 is available on all JVMs, will never happen
throw new IllegalStateException(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ChunkerSampleStreamFactory.java Tue Feb 4 17:10:11 2014
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.formats;
import opennlp.tools.chunker.ChunkSample;
@@ -27,6 +26,10 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
/**
* Factory producing OpenNLP {@link ChunkSampleStream}s.
@@ -38,7 +41,7 @@ public class ChunkerSampleStreamFactory
public static void registerFactory() {
StreamFactoryRegistry.registerFactory(ChunkSample.class,
- StreamFactoryRegistry.DEFAULT_FORMAT, new ChunkerSampleStreamFactory(Parameters.class));
+ StreamFactoryRegistry.DEFAULT_FORMAT, new ChunkerSampleStreamFactory(Parameters.class));
}
protected <P> ChunkerSampleStreamFactory(Class<P> params) {
@@ -50,9 +53,13 @@ public class ChunkerSampleStreamFactory
CmdLineUtil.checkInputFile("Data", params.getData());
FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
-
- ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn
- .getChannel(), params.getEncoding());
+ ObjectStream<String> lineStream = null;
+ try {
+ lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn), params.getEncoding());
+
+ } catch (IOException ex) {
+ Logger.getLogger(ChunkerSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
+ }
return new ChunkSampleStream(lineStream);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java Tue Feb 4 17:10:11 2014
@@ -26,6 +26,7 @@ import java.util.List;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -80,9 +81,9 @@ public class Conll02NameSampleStream imp
this.lang = lang;
try {
- this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+ this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8");
System.setOut(new PrintStream(System.out, true, "UTF-8"));
- } catch (UnsupportedEncodingException e) {
+ } catch (IOException e) {
// UTF-8 is available on all JVMs, will never happen
throw new IllegalStateException(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStream.java Tue Feb 4 17:10:11 2014
@@ -25,6 +25,7 @@ import java.util.ArrayList;
import java.util.List;
import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -67,9 +68,9 @@ public class Conll03NameSampleStream imp
this.lang = lang;
try {
- this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+ this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8");
System.setOut(new PrintStream(System.out, true, "UTF-8"));
- } catch (UnsupportedEncodingException e) {
+ } catch (IOException e) {
// UTF-8 is available on all JVMs, will never happen
throw new IllegalStateException(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/DocumentSampleStreamFactory.java Tue Feb 4 17:10:11 2014
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.formats;
import opennlp.tools.cmdline.ArgumentParser;
@@ -27,6 +26,10 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
/**
* Factory producing OpenNLP {@link DocumentSampleStream}s.
@@ -38,7 +41,7 @@ public class DocumentSampleStreamFactory
public static void registerFactory() {
StreamFactoryRegistry.registerFactory(DocumentSample.class,
- StreamFactoryRegistry.DEFAULT_FORMAT, new DocumentSampleStreamFactory(Parameters.class));
+ StreamFactoryRegistry.DEFAULT_FORMAT, new DocumentSampleStreamFactory(Parameters.class));
}
protected <P> DocumentSampleStreamFactory(Class<P> params) {
@@ -50,9 +53,16 @@ public class DocumentSampleStreamFactory
CmdLineUtil.checkInputFile("Data", params.getData());
FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
-
- ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
- params.getEncoding());
+ ObjectStream<String> lineStream=null;
+ try {
+ lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+ params.getEncoding());
+ // params.getEncoding());
+ // ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
+ // params.getEncoding());
+ } catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
return new DocumentSampleStream(lineStream);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/EvalitaNameSampleStream.java Tue Feb 4 17:10:11 2014
@@ -26,6 +26,7 @@ import java.util.List;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -87,9 +88,9 @@ public class EvalitaNameSampleStream imp
this.lang = lang;
try {
- this.lineStream = new PlainTextByLineStream(in, "UTF-8");
+ this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8");
System.setOut(new PrintStream(System.out, true, "UTF-8"));
- } catch (UnsupportedEncodingException e) {
+ } catch (IOException e) {
// UTF-8 is available on all JVMs, will never happen
throw new IllegalStateException(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/LeipzigDoccatSampleStream.java Tue Feb 4 17:10:11 2014
@@ -24,6 +24,7 @@ import java.io.PrintStream;
import opennlp.tools.doccat.DocumentSample;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.PlainTextByLineStream;
/**
@@ -52,7 +53,7 @@ public class LeipzigDoccatSampleStream e
*/
LeipzigDoccatSampleStream(String language, int sentencesPerDocument,
InputStream in) throws IOException {
- super(new PlainTextByLineStream(in, "UTF-8"));
+ super(new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8"));
System.setOut(new PrintStream(System.out, true, "UTF-8"));
this.language = language;
this.sentencesPerDocument = sentencesPerDocument;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameFinderCensus90NameStream.java Tue Feb 4 17:10:11 2014
@@ -12,13 +12,15 @@
* limitations under the License.
* under the License.
*/
-
package opennlp.tools.formats;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Locale;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -27,10 +29,10 @@ import opennlp.tools.util.StringUtil;
/**
* This class helps to read the US Census data from the files to build a
- * StringList for each dictionary entry in the name-finder dictionary.
- * The entries in the source file are as follows:
+ * StringList for each dictionary entry in the name-finder dictionary. The
+ * entries in the source file are as follows:
* <p>
- * SMITH 1.006 1.006 1
+ * SMITH 1.006 1.006 1
* <p>
* <ul>
* <li>The first field is the name (in ALL CAPS).
@@ -45,14 +47,14 @@ public class NameFinderCensus90NameStrea
private final Locale locale;
private final Charset encoding;
- private final ObjectStream<String> lineStream;
+ private ObjectStream<String> lineStream;
/**
* This constructor takes an ObjectStream and initializes the class to handle
* the stream.
*
- * @param lineStream an <code>ObjectSteam<String></code> that represents the
- * input file to be attached to this class.
+ * @param lineStream an <code>ObjectSteam<String></code> that represents the
+ * input file to be attached to this class.
*/
public NameFinderCensus90NameStream(ObjectStream<String> lineStream) {
this.locale = new Locale("en"); // locale is English
@@ -62,24 +64,32 @@ public class NameFinderCensus90NameStrea
}
/**
- * This constructor takes an <code>InputStream</code> and a <code>Charset</code>
- * and opens an associated stream object with the specified encoding specified.
+ * This constructor takes an
+ * <code>InputStream</code> and a
+ * <code>Charset</code> and opens an associated stream object with the
+ * specified encoding specified.
*
- * @param in an <code>InputStream</code> for the input file.
- * @param encoding the <code>Charset</code> to apply to the input stream.
+ * @param in an <code>InputStream</code> for the input file.
+ * @param encoding the <code>Charset</code> to apply to the input stream.
*/
public NameFinderCensus90NameStream(InputStream in, Charset encoding) {
this.locale = new Locale("en"); // locale is English
this.encoding = encoding;
- this.lineStream = new PlainTextByLineStream(in, this.encoding);
+
+ try {
+ this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), this.encoding);
+ } catch (IOException ex) {
+
+ throw new RuntimeException(ex);
+ }
}
public StringList read() throws IOException {
String line = lineStream.read();
StringList name = null;
- if ((line != null) &&
- (!StringUtil.isEmpty(line))) {
+ if ((line != null)
+ && (!StringUtil.isEmpty(line))) {
String name2;
// find the location of the name separator in the line of data.
int pos = line.indexOf(' ');
@@ -87,15 +97,15 @@ public class NameFinderCensus90NameStrea
String parsed = line.substring(0, pos);
// the data is in ALL CAPS ... so the easiest way is to convert
// back to standard mixed case.
- if ((parsed.length() > 2) &&
- (parsed.startsWith("MC"))) {
- name2 = parsed.substring(0,1).toUpperCase(locale) +
- parsed.substring(1,2).toLowerCase(locale) +
- parsed.substring(2,3).toUpperCase(locale) +
- parsed.substring(3).toLowerCase(locale);
+ if ((parsed.length() > 2)
+ && (parsed.startsWith("MC"))) {
+ name2 = parsed.substring(0, 1).toUpperCase(locale)
+ + parsed.substring(1, 2).toLowerCase(locale)
+ + parsed.substring(2, 3).toUpperCase(locale)
+ + parsed.substring(3).toLowerCase(locale);
} else {
- name2 = parsed.substring(0,1).toUpperCase(locale) +
- parsed.substring(1).toLowerCase(locale);
+ name2 = parsed.substring(0, 1).toUpperCase(locale)
+ + parsed.substring(1).toLowerCase(locale);
}
name = new StringList(new String[]{name2});
}
@@ -111,5 +121,4 @@ public class NameFinderCensus90NameStrea
public void close() throws IOException {
lineStream.close();
}
-
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/NameSampleDataStreamFactory.java Tue Feb 4 17:10:11 2014
@@ -14,10 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.formats;
import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.CmdLineUtil;
@@ -25,6 +27,7 @@ import opennlp.tools.cmdline.StreamFacto
import opennlp.tools.cmdline.params.BasicFormatParams;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.NameSampleDataStream;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -38,7 +41,7 @@ public class NameSampleDataStreamFactory
public static void registerFactory() {
StreamFactoryRegistry.registerFactory(NameSample.class,
- StreamFactoryRegistry.DEFAULT_FORMAT, new NameSampleDataStreamFactory(Parameters.class));
+ StreamFactoryRegistry.DEFAULT_FORMAT, new NameSampleDataStreamFactory(Parameters.class));
}
protected <P> NameSampleDataStreamFactory(Class<P> params) {
@@ -49,11 +52,16 @@ public class NameSampleDataStreamFactory
Parameters params = ArgumentParser.parse(args, Parameters.class);
CmdLineUtil.checkInputFile("Data", params.getData());
-
+
FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
- ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
- params.getEncoding());
+ ObjectStream<String> lineStream = null;
+ try {
+ lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+ params.getEncoding());
+ } catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
return new NameSampleDataStream(lineStream);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ParseSampleStreamFactory.java Tue Feb 4 17:10:11 2014
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.formats;
import opennlp.tools.cmdline.ArgumentParser;
@@ -27,6 +26,10 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
/**
* Factory producing OpenNLP {@link ParseSampleStream}s.
@@ -38,7 +41,7 @@ public class ParseSampleStreamFactory ex
public static void registerFactory() {
StreamFactoryRegistry.registerFactory(Parse.class,
- StreamFactoryRegistry.DEFAULT_FORMAT, new ParseSampleStreamFactory(Parameters.class));
+ StreamFactoryRegistry.DEFAULT_FORMAT, new ParseSampleStreamFactory(Parameters.class));
}
protected <P> ParseSampleStreamFactory(Class<P> params) {
@@ -51,8 +54,12 @@ public class ParseSampleStreamFactory ex
CmdLineUtil.checkInputFile("Data", params.getData());
FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
- ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn
- .getChannel(), params.getEncoding());
+ ObjectStream<String> lineStream = null;
+ try {
+ lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn), params.getEncoding());
+ } catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
return new ParseSampleStream(lineStream);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/SentenceSampleStreamFactory.java Tue Feb 4 17:10:11 2014
@@ -27,6 +27,10 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
/**
* Factory producing OpenNLP {@link SentenceSampleStream}s.
@@ -51,8 +55,13 @@ public class SentenceSampleStreamFactory
CmdLineUtil.checkInputFile("Data", params.getData());
FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
- ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
- params.getEncoding());
+ ObjectStream<String> lineStream=null;
+ try {
+ lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+params.getEncoding());
+ } catch (IOException ex) {
+ Logger.getLogger(SentenceSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
+ }
return new SentenceSampleStream(lineStream);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/TokenSampleStreamFactory.java Tue Feb 4 17:10:11 2014
@@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.formats;
import opennlp.tools.cmdline.ArgumentParser;
@@ -27,6 +26,10 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import opennlp.tools.util.MockInputStreamFactory;
/**
* Factory producing OpenNLP {@link TokenSampleStream}s.
@@ -38,7 +41,7 @@ public class TokenSampleStreamFactory ex
public static void registerFactory() {
StreamFactoryRegistry.registerFactory(TokenSample.class,
- StreamFactoryRegistry.DEFAULT_FORMAT, new TokenSampleStreamFactory(Parameters.class));
+ StreamFactoryRegistry.DEFAULT_FORMAT, new TokenSampleStreamFactory(Parameters.class));
}
protected <P> TokenSampleStreamFactory(Class<P> params) {
@@ -51,8 +54,13 @@ public class TokenSampleStreamFactory ex
CmdLineUtil.checkInputFile("Data", params.getData());
FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
- ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
- params.getEncoding());
+ ObjectStream<String> lineStream = null;
+ try {
+ lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+ params.getEncoding());
+ } catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
return new TokenSampleStream(lineStream);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/WordTagSampleStreamFactory.java Tue Feb 4 17:10:11 2014
@@ -14,10 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package opennlp.tools.formats;
import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.CmdLineUtil;
@@ -25,6 +27,7 @@ import opennlp.tools.cmdline.StreamFacto
import opennlp.tools.cmdline.params.BasicFormatParams;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.WordTagSampleStream;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -38,9 +41,9 @@ public class WordTagSampleStreamFactory
public static void registerFactory() {
StreamFactoryRegistry.registerFactory(POSSample.class,
- StreamFactoryRegistry.DEFAULT_FORMAT, new WordTagSampleStreamFactory(Parameters.class));
+ StreamFactoryRegistry.DEFAULT_FORMAT, new WordTagSampleStreamFactory(Parameters.class));
}
-
+
protected <P> WordTagSampleStreamFactory(Class<P> params) {
super(params);
}
@@ -51,8 +54,13 @@ public class WordTagSampleStreamFactory
CmdLineUtil.checkInputFile("Data", params.getData());
FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
- ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
- params.getEncoding());
+ ObjectStream<String> lineStream = null;
+ try {
+ lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+ params.getEncoding());
+ } catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
return new WordTagSampleStream(lineStream);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStream.java Tue Feb 4 17:10:11 2014
@@ -29,6 +29,7 @@ import opennlp.tools.formats.ad.ADSenten
import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Node;
import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.TreeElement;
import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.StringUtil;
@@ -91,8 +92,8 @@ public class ADChunkSampleStream impleme
try {
this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
- in, charsetName));
- } catch (UnsupportedEncodingException e) {
+ new MockInputStreamFactory(in), charsetName));
+ } catch (IOException e) {
// UTF-8 is available on all JVMs, will never happen
throw new IllegalStateException(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADChunkSampleStreamFactory.java Tue Feb 4 17:10:11 2014
@@ -19,7 +19,10 @@ package opennlp.tools.formats.ad;
import java.io.File;
import java.io.FileInputStream;
+import java.io.IOException;
import java.nio.charset.Charset;
+import java.util.logging.Level;
+import java.util.logging.Logger;
import opennlp.tools.chunker.ChunkSample;
import opennlp.tools.cmdline.ArgumentParser;
@@ -28,6 +31,7 @@ import opennlp.tools.cmdline.ArgumentPar
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.formats.LanguageSampleStreamFactory;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -78,8 +82,13 @@ public class ADChunkSampleStreamFactory
FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
- ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
- params.getEncoding());
+ ObjectStream<String> lineStream=null;
+ try {
+ lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
+params.getEncoding());
+ } catch (IOException ex) {
+ Logger.getLogger(ADChunkSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
+ }
ADChunkSampleStream sampleStream = new ADChunkSampleStream(lineStream);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStream.java Tue Feb 4 17:10:11 2014
@@ -35,6 +35,7 @@ import opennlp.tools.formats.ad.ADSenten
import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Node;
import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.TreeElement;
import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -191,9 +192,9 @@ public class ADNameSampleStream implemen
try {
this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
- in, charsetName));
+ new MockInputStreamFactory(in), charsetName));
this.splitHyphenatedTokens = splitHyphenatedTokens;
- } catch (UnsupportedEncodingException e) {
+ } catch (IOException e) {
// UTF-8 is available on all JVMs, will never happen
throw new IllegalStateException(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADNameSampleStreamFactory.java Tue Feb 4 17:10:11 2014
@@ -19,7 +19,10 @@ package opennlp.tools.formats.ad;
import java.io.File;
import java.io.FileInputStream;
+import java.io.IOException;
import java.nio.charset.Charset;
+import java.util.logging.Level;
+import java.util.logging.Logger;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
@@ -28,6 +31,7 @@ import opennlp.tools.cmdline.CmdLineUtil
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.formats.LanguageSampleStreamFactory;
import opennlp.tools.namefind.NameSample;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -74,8 +78,12 @@ public class ADNameSampleStreamFactory e
FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
- ObjectStream<String> lineStream = new PlainTextByLineStream(
- sampleDataIn.getChannel(), params.getEncoding());
+ ObjectStream<String> lineStream=null;
+ try {
+ lineStream = new PlainTextByLineStream(
+new MockInputStreamFactory(sampleDataIn), params.getEncoding());
+ } catch (IOException ex) {
+throw new RuntimeException(ex) ; }
return new ADNameSampleStream(lineStream, params.getSplitHyphenatedTokens());
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStream.java Tue Feb 4 17:10:11 2014
@@ -29,6 +29,7 @@ import opennlp.tools.formats.ad.ADSenten
import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.Node;
import opennlp.tools.formats.ad.ADSentenceStream.SentenceParser.TreeElement;
import opennlp.tools.postag.POSSample;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -81,10 +82,10 @@ public class ADPOSSampleStream implement
try {
this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
- in, charsetName));
+ new MockInputStreamFactory(in), charsetName));
this.expandME = expandME;
this.isIncludeFeatures = includeFeatures;
- } catch (UnsupportedEncodingException e) {
+ } catch (IOException e) {
// UTF-8 is available on all JVMs, will never happen
throw new IllegalStateException(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADPOSSampleStreamFactory.java Tue Feb 4 17:10:11 2014
@@ -19,7 +19,10 @@ package opennlp.tools.formats.ad;
import java.io.File;
import java.io.FileInputStream;
+import java.io.IOException;
import java.nio.charset.Charset;
+import java.util.logging.Level;
+import java.util.logging.Logger;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
@@ -28,6 +31,7 @@ import opennlp.tools.cmdline.CmdLineUtil
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.formats.LanguageSampleStreamFactory;
import opennlp.tools.postag.POSSample;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -73,8 +77,13 @@ public class ADPOSSampleStreamFactory ex
FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
- ObjectStream<String> lineStream = new PlainTextByLineStream(
- sampleDataIn.getChannel(), params.getEncoding());
+ ObjectStream<String> lineStream=null;
+ try {
+ lineStream = new PlainTextByLineStream(
+new MockInputStreamFactory(sampleDataIn), params.getEncoding());
+ } catch (IOException ex) {
+ Logger.getLogger(ADPOSSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
+ }
ADPOSSampleStream sentenceStream = new ADPOSSampleStream(lineStream,
params.getExpandME(), params.getIncludeFeatures());
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStream.java Tue Feb 4 17:10:11 2014
@@ -29,6 +29,7 @@ import java.util.regex.Pattern;
import opennlp.tools.formats.ad.ADSentenceStream.Sentence;
import opennlp.tools.sentdetect.SentenceSample;
import opennlp.tools.sentdetect.lang.Factory;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -81,8 +82,8 @@ public class ADSentenceSampleStream impl
boolean includeHeadlines) {
try {
this.adSentenceStream = new ADSentenceStream(new PlainTextByLineStream(
- in, charsetName));
- } catch (UnsupportedEncodingException e) {
+ new MockInputStreamFactory(in), charsetName));
+ } catch (IOException e) {
// UTF-8 is available on all JVMs, will never happen
throw new IllegalStateException(e);
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/ad/ADSentenceSampleStreamFactory.java Tue Feb 4 17:10:11 2014
@@ -19,7 +19,10 @@ package opennlp.tools.formats.ad;
import java.io.File;
import java.io.FileInputStream;
+import java.io.IOException;
import java.nio.charset.Charset;
+import java.util.logging.Level;
+import java.util.logging.Logger;
import opennlp.tools.cmdline.ArgumentParser;
import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
@@ -28,6 +31,7 @@ import opennlp.tools.cmdline.CmdLineUtil
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.formats.LanguageSampleStreamFactory;
import opennlp.tools.sentdetect.SentenceSample;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -71,8 +75,13 @@ public class ADSentenceSampleStreamFacto
FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
- ObjectStream<String> lineStream = new PlainTextByLineStream(
- sampleDataIn.getChannel(), params.getEncoding());
+ ObjectStream<String> lineStream=null;
+ try {
+ lineStream = new PlainTextByLineStream(
+new MockInputStreamFactory(sampleDataIn), params.getEncoding());
+ } catch (IOException ex) {
+ Logger.getLogger(ADSentenceSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
+ }
ADSentenceSampleStream sentenceStream = new ADSentenceSampleStream(
lineStream, includeTitle);
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderEventStream.java Tue Feb 4 17:10:11 2014
@@ -24,6 +24,7 @@ import java.util.Map;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.EventStream;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -151,7 +152,7 @@ public class NameFinderEventStream exten
System.exit(1);
}
EventStream es = new NameFinderEventStream(new NameSampleDataStream(
- new PlainTextByLineStream(new java.io.InputStreamReader(System.in))));
+ new PlainTextByLineStream(new MockInputStreamFactory(System.in),"UTF-8")));
while (es.hasNext()) {
System.out.println(es.next());
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinder.java Tue Feb 4 17:10:11 2014
@@ -125,8 +125,9 @@ public final class RegexNameFinder imple
}
/**
- * NEW. This method removes the need for tokenization, but returns the Span
- * with character indices, rather than word.
+ * NEW. This method removes the need for tokenization, but returns the
+ * character spans rather than word spans. Span.spansToStrings will not work
+ * properly on this output.
*
* @param text
* @return
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/RegexNameFinderFactory.java Tue Feb 4 17:10:11 2014
@@ -23,7 +23,7 @@ import opennlp.tools.util.Span;
/**
*
* Returns RegexNameFinders based on multiple methods: 1. A selection of
- * defaults 2. A configuration and a selection of defaults 3.
+ * defaults 2. A configuration and a selection of defaults
*/
public class RegexNameFinderFactory {
@@ -41,7 +41,10 @@ public class RegexNameFinderFactory {
if (config == null) {
throw new IllegalArgumentException("config Map cannot be null");
}
- Map<String, Pattern[]> defaultsToMap = defaultsToMap(defaults);
+ Map<String, Pattern[]> defaultsToMap = new HashMap<>();
+ if (defaults != null) {
+ defaultsToMap = defaultsToMap(defaults);
+ }
defaultsToMap.putAll(config);
return new RegexNameFinder(defaultsToMap);
}
@@ -109,8 +112,8 @@ public class RegexNameFinderFactory {
@Override
public Map<String, Pattern[]> getRegexMap() {
Pattern[] p = new Pattern[1];
- // p[0] = Pattern.compile("([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})", Pattern.CASE_INSENSITIVE);
- p[0]=Pattern.compile("((\\(\\d{3}\\) ?)|(\\d{3}-))?\\d{3}-\\d{4}");
+ // p[0] = Pattern.compile("([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})|([\\+(]?(\\d){2,}[)]?[- \\.]?(\\d){2,}[- \\.]?(\\d){2,})", Pattern.CASE_INSENSITIVE);
+ p[0] = Pattern.compile("((\\(\\d{3}\\) ?)|(\\d{3}-))?\\d{3}-\\d{4}");
Map<String, Pattern[]> regexMap = new HashMap<>();
regexMap.put(getType(), p);
return regexMap;
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/chunking/ParserEventStream.java Tue Feb 4 17:10:11 2014
@@ -29,6 +29,7 @@ import opennlp.tools.parser.Parse;
import opennlp.tools.parser.ParseSampleStream;
import opennlp.tools.parser.ParserEventTypeEnum;
import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
@@ -204,7 +205,7 @@ public class ParserEventStream extends A
if (fun) {
Parse.useFunctionTags(true);
}
- opennlp.tools.ml.model.EventStream es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.InputStreamReader(System.in))), rules, etype, dict);
+ opennlp.tools.ml.model.EventStream es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new MockInputStreamFactory(System.in),"UTF-8")), rules, etype, dict);
while (es.hasNext()) {
System.out.println(es.next());
}
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java?rev=1564379&r1=1564378&r2=1564379&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/treeinsert/ParserEventStream.java Tue Feb 4 17:10:11 2014
@@ -36,6 +36,7 @@ import opennlp.tools.parser.Parse;
import opennlp.tools.parser.ParseSampleStream;
import opennlp.tools.parser.ParserEventTypeEnum;
import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.MockInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
@@ -379,7 +380,7 @@ public class ParserEventStream extends A
if (fun) {
Parse.useFunctionTags(true);
}
- opennlp.tools.ml.model.EventStream es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new java.io.InputStreamReader(System.in))), rules, etype, dict);
+ opennlp.tools.ml.model.EventStream es = new ParserEventStream(new ParseSampleStream(new PlainTextByLineStream(new MockInputStreamFactory(System.in),"UTF-8")), rules, etype, dict);
while (es.hasNext()) {
Event e = es.next();
if (model != null) {
Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/
main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/
main/java/opennlp/tools/cmdlin...
Posted by Jörn Kottmann <ko...@gmail.com>.
Please don't change files which don't have to be changed for a certain
issue.
Changes, like adding Override annotations should be done in a separate
jira issue.
Jörn
On 02/04/2014 06:10 PM, markg@apache.org wrote:
> Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
> URL:http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
> ==============================================================================
> --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java (original)
> +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java Tue Feb 4 17:10:11 2014
> @@ -40,10 +40,12 @@ public class DoccatTrainerTool
> super(DocumentSample.class, TrainerToolParams.class);
> }
>
> + @Override
> public String getShortDescription() {
> return "trainer for the learnable document categorizer";
> }
>
> + @Override
> public void run(String format, String[] args) {
> super.run(format, args);
>
Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/
main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/
main/java/opennlp/tools/cmdlin...
Posted by Jörn Kottmann <ko...@gmail.com>.
On 02/06/2014 01:28 PM, Jörn Kottmann wrote:
> On 02/04/2014 06:10 PM, markg@apache.org wrote:
>> Modified:
>> opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
>> URL:http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
>>
>> ==============================================================================
>>
>> ---
>> opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
>> (original)
>> +++
>> opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
>> Tue Feb 4 17:10:11 2014
>> @@ -14,7 +14,6 @@
>> * See the License for the specific language governing permissions and
>> * limitations under the License.
>> */
>
> <SNIP>
>> - ObjectStream<String> lineStream =
>> - new PlainTextByLineStream(new InputStreamReader(System.in));
>> -
>> - PerformanceMonitor perfMon = new
>> PerformanceMonitor(System.err, "sent");
>> - perfMon.start();
>> + ObjectStream<String> lineStream = null;
>> + PerformanceMonitor perfMon = null;
>> try {
>> + lineStream = new PlainTextByLineStream(new
>> MockInputStreamFactory(System.in), "UTF-8");
>> + perfMon = new PerformanceMonitor(System.err, "sent");
>> + perfMon.start();
>
>
> What is the motivation to move the start of the performance monitoring
> down? Before it measured opening
> the input stream as well, now it won't.
>
> If you think it should not include opening the stream I suggest we do
> this change in a separate jira issue and handle it
> identical in all places.
Ups, my mistake, I was looking at the wrong file. Here it was moved down
to maintain the order.
But in the TokenNameFinderTool it was changed.
Jörn
Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/
main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/
main/java/opennlp/tools/cmdlin...
Posted by Jörn Kottmann <ko...@gmail.com>.
On 02/04/2014 06:10 PM, markg@apache.org wrote:
> Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java
> URL:http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java?rev=1564379&r1=1564378&r2=1564379&view=diff
> ==============================================================================
> --- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java (original)
> +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/postag/POSTaggerTool.java Tue Feb 4 17:10:11 2014
> @@ -14,7 +14,6 @@
> * See the License for the specific language governing permissions and
> * limitations under the License.
> */
<SNIP>
>
> - ObjectStream<String> lineStream =
> - new PlainTextByLineStream(new InputStreamReader(System.in));
> -
> - PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
> - perfMon.start();
> + ObjectStream<String> lineStream = null;
> + PerformanceMonitor perfMon = null;
>
> try {
> + lineStream = new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
> + perfMon = new PerformanceMonitor(System.err, "sent");
> + perfMon.start();
What is the motivation to move the start of the performance monitoring
down? Before it measured opening
the input stream as well, now it won't.
If you think it should not include opening the stream I suggest we do
this change in a separate jira issue and handle it
identical in all places.
Jörn
Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/
main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/
main/java/opennlp/tools/cmdlin...
Posted by Jörn Kottmann <ko...@gmail.com>.
On 02/04/2014 06:10 PM, markg@apache.org wrote:
> @@ -51,8 +55,13 @@ public class SentenceSampleStreamFactory
> CmdLineUtil.checkInputFile("Data", params.getData());
> FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
>
> - ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
> - params.getEncoding());
> + ObjectStream<String> lineStream=null;
> + try {
> + lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
> +params.getEncoding());
> + } catch (IOException ex) {
> + Logger.getLogger(SentenceSampleStreamFactory.class.getName()).log(Level.SEVERE, null, ex);
> + }
>
> return new SentenceSampleStream(lineStream);
> }
>
<SNIP>
>
> /**
> * Factory producing OpenNLP {@link TokenSampleStream}s.
> @@ -38,7 +41,7 @@ public class TokenSampleStreamFactory ex
>
> public static void registerFactory() {
> StreamFactoryRegistry.registerFactory(TokenSample.class,
> - StreamFactoryRegistry.DEFAULT_FORMAT, new TokenSampleStreamFactory(Parameters.class));
> + StreamFactoryRegistry.DEFAULT_FORMAT, new TokenSampleStreamFactory(Parameters.class));
> }
>
> protected <P> TokenSampleStreamFactory(Class<P> params) {
> @@ -51,8 +54,13 @@ public class TokenSampleStreamFactory ex
> CmdLineUtil.checkInputFile("Data", params.getData());
> FileInputStream sampleDataIn = CmdLineUtil.openInFile(params.getData());
>
> - ObjectStream<String> lineStream = new PlainTextByLineStream(sampleDataIn.getChannel(),
> - params.getEncoding());
> + ObjectStream<String> lineStream = null;
> + try {
> + lineStream = new PlainTextByLineStream(new MockInputStreamFactory(sampleDataIn),
> + params.getEncoding());
> + } catch (IOException ex) {
> + throw new RuntimeException(ex);
> + }
We need to be careful here with the error handling.
It is always good to see how things worked before. In the previous
version a TerminateToolException was thrown in case
the stream couldn't be opened. I suggest that we keep that way of
handling it. Anway, what ever we decide, we should do
it consistently across the code base.
Jörn
Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/
main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/
main/java/opennlp/tools/cmdlin...
Posted by Jörn Kottmann <ko...@gmail.com>.
On 02/04/2014 06:10 PM, markg@apache.org wrote:
> public BioNLP2004NameSampleStream(InputStream in, int types) {
> try {
> - this.lineStream = new PlainTextByLineStream(in, "UTF-8");
> + this.lineStream = new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8");
> System.setOut(new PrintStream(System.out, true, "UTF-8"));
> - } catch (UnsupportedEncodingException e) {
> + } catch (IOException e) {
> // UTF-8 is available on all JVMs, will never happen
> throw new IllegalStateException(e);
> }
All these streams need to be changed as well, the InputStream in has to
be deprecated and a new
constructor taking the InputStreamFactory needs to be added.
I suggest that we revert all these changes, and instead just add new
constructor.
Jörn
Re: svn commit: r1564379 [1/2] - in /opennlp/trunk/opennlp-tools/src:
main/java/opennlp/tools/cmdline/ main/java/opennlp/tools/cmdline/chunker/
main/java/opennlp/tools/cmdline/doccat/ main/java/opennlp/tools/cmdline/namefind/
main/java/opennlp/tools/cmdlin...
Posted by Jörn Kottmann <ko...@gmail.com>.
On 02/04/2014 06:10 PM, markg@apache.org wrote:
> +++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/tokenizer/CommandLineTokenizer.java Tue Feb 4 17:10:11 2014
<SNIP>
> void process() {
> -
> - ObjectStream<String> untokenizedLineStream =
> - new PlainTextByLineStream(new InputStreamReader(System.in));
> -
> - ObjectStream<String> tokenizedLineStream = new WhitespaceTokenStream(
> - new TokenizerStream(tokenizer, untokenizedLineStream));
> -
> - PerformanceMonitor perfMon = new PerformanceMonitor(System.err, "sent");
> - perfMon.start();
> -
> + ObjectStream<String> untokenizedLineStream = null;
> +
> + ObjectStream<String> tokenizedLineStream = null;
> + PerformanceMonitor perfMon = null;
> try {
> + untokenizedLineStream =
> + new PlainTextByLineStream(new MockInputStreamFactory(System.in), "UTF-8");
The encoding should not be changed. To read from System.in the default
encoding should be used, and not UTF-8.
As far as I know that will not work on Windows.
Jörn