You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/05/04 16:13:18 UTC
svn commit: r1333973 - in
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: cmdline/
formats/convert/
Author: joern
Date: Fri May 4 14:13:17 2012
New Revision: 1333973
URL: http://svn.apache.org/viewvc?rev=1333973&view=rev
Log:
OPENNLP-342 New Parse Sample converters
Added:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java (with props)
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java (with props)
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java (with props)
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java (with props)
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java?rev=1333973&r1=1333972&r2=1333973&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java Fri May 4 14:13:17 2012
@@ -44,6 +44,9 @@ import opennlp.tools.formats.convert.Nam
import opennlp.tools.formats.convert.NameToTokenSampleStreamFactory;
import opennlp.tools.formats.convert.POSToSentenceSampleStreamFactory;
import opennlp.tools.formats.convert.POSToTokenSampleStreamFactory;
+import opennlp.tools.formats.convert.ParseToPOSSampleStreamFactory;
+import opennlp.tools.formats.convert.ParseToSentenceSampleStreamFactory;
+import opennlp.tools.formats.convert.ParseToTokenSampleStreamFactory;
import opennlp.tools.formats.frenchtreebank.ConstitParseSampleStreamFactory;
import opennlp.tools.formats.muc.Muc6FullParseCorefSampleStreamFactory;
import opennlp.tools.formats.muc.Muc6NameSampleStreamFactory;
@@ -68,9 +71,14 @@ public final class StreamFactoryRegistry
NameToSentenceSampleStreamFactory.registerFactory();
NameToTokenSampleStreamFactory.registerFactory();
+
POSToSentenceSampleStreamFactory.registerFactory();
POSToTokenSampleStreamFactory.registerFactory();
+ ParseToPOSSampleStreamFactory.registerFactory();
+ ParseToSentenceSampleStreamFactory.registerFactory();
+ ParseToTokenSampleStreamFactory.registerFactory();
+
BioNLP2004NameSampleStreamFactory.registerFactory();
Conll02NameSampleStreamFactory.registerFactory();
Conll03NameSampleStreamFactory.registerFactory();
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java?rev=1333973&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java Fri May 4 14:13:17 2012
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.convert;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.parser.Parse;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * <b>Note:</b> Do not use this class, internal use only!
+ */
+public class ParseToPOSSampleStream extends FilterObjectStream<Parse, POSSample> {
+
+ protected ParseToPOSSampleStream(ObjectStream<Parse> samples) {
+ super(samples);
+ }
+
+ public POSSample read() throws IOException {
+
+ Parse parse = samples.read();
+
+ if (parse != null) {
+
+ List<String> sentence = new ArrayList<String>();
+ List<String> tags = new ArrayList<String>();
+
+ for(Parse tagNode : parse.getTagNodes()) {
+ sentence.add(tagNode.toString());
+ tags.add(tagNode.getType());
+ }
+
+ return new POSSample(sentence, tags);
+ }
+ else {
+ return null;
+ }
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java?rev=1333973&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java Fri May 4 14:13:17 2012
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.convert;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.formats.LanguageSampleStreamFactory;
+import opennlp.tools.formats.ParseSampleStreamFactory;
+import opennlp.tools.formats.convert.POSToSentenceSampleStreamFactory.Parameters;
+import opennlp.tools.parser.Parse;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.util.ObjectStream;
+
+
+/**
+ * <b>Note:</b> Do not use this class, internal use only!
+ */
+public class ParseToPOSSampleStreamFactory extends LanguageSampleStreamFactory<POSSample> {
+
+ private ParseToPOSSampleStreamFactory() {
+ super(ParseSampleStreamFactory.Parameters.class);
+ }
+
+ public ObjectStream<POSSample> create(String[] args) {
+
+ Parameters params = ArgumentParser.parse(args, Parameters.class);
+ language = params.getLang();
+
+ ObjectStream<Parse> parseSampleStream = StreamFactoryRegistry.getFactory(Parse.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT).create(
+ ArgumentParser.filter(args, ParseSampleStreamFactory.Parameters.class));
+
+ return new ParseToPOSSampleStream(parseSampleStream);
+ }
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(POSSample.class,
+ "parse", new ParseToPOSSampleStreamFactory());
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java?rev=1333973&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java Fri May 4 14:13:17 2012
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.convert;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.DetokenizerParameter;
+import opennlp.tools.formats.DetokenizerSampleStreamFactory;
+import opennlp.tools.formats.ParseSampleStreamFactory;
+import opennlp.tools.parser.Parse;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.sentdetect.SentenceSample;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * <b>Note:</b> Do not use this class, internal use only!
+ */
+public class ParseToSentenceSampleStreamFactory extends DetokenizerSampleStreamFactory<SentenceSample> {
+
+ interface Parameters extends ParseSampleStreamFactory.Parameters, DetokenizerParameter {
+ }
+
+ private ParseToSentenceSampleStreamFactory() {
+ super(Parameters.class);
+ }
+
+ public ObjectStream<SentenceSample> create(String[] args) {
+ Parameters params = ArgumentParser.parse(args, Parameters.class);
+ language = params.getLang();
+
+ ObjectStream<Parse> parseSampleStream = StreamFactoryRegistry.getFactory(Parse.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT).create(
+ ArgumentParser.filter(args, Parameters.class));
+
+ return new POSToSentenceSampleStream(createDetokenizer(params),
+ new ParseToPOSSampleStream(parseSampleStream), 30);
+ }
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(SentenceSample.class,
+ "parse", new ParseToSentenceSampleStreamFactory());
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java?rev=1333973&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java Fri May 4 14:13:17 2012
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.convert;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.DetokenizerParameter;
+import opennlp.tools.formats.DetokenizerSampleStreamFactory;
+import opennlp.tools.formats.ParseSampleStreamFactory;
+import opennlp.tools.formats.WordTagSampleStreamFactory;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.tokenize.TokenSample;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * <b>Note:</b> Do not use this class, internal use only!
+ */
+public class ParseToTokenSampleStreamFactory extends DetokenizerSampleStreamFactory<TokenSample> {
+
+ interface Parameters extends ParseSampleStreamFactory.Parameters, DetokenizerParameter {
+ }
+
+ private ParseToTokenSampleStreamFactory() {
+ super(Parameters.class);
+ }
+
+ public ObjectStream<TokenSample> create(String[] args) {
+ Parameters params = ArgumentParser.parse(args, Parameters.class);
+ language = params.getLang();
+
+ ObjectStream<POSSample> posSampleStream = StreamFactoryRegistry.getFactory(POSSample.class,
+ StreamFactoryRegistry.DEFAULT_FORMAT).create(
+ ArgumentParser.filter(args, WordTagSampleStreamFactory.Parameters.class));
+ return new POSToTokenSampleStream(createDetokenizer(params), posSampleStream);
+ }
+
+ public static void registerFactory() {
+ StreamFactoryRegistry.registerFactory(TokenSample.class,
+ "parse", new ParseToTokenSampleStreamFactory());
+ }
+}
Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java
------------------------------------------------------------------------------
svn:mime-type = text/plain