You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/05/04 16:13:18 UTC

svn commit: r1333973 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: cmdline/ formats/convert/

Author: joern
Date: Fri May  4 14:13:17 2012
New Revision: 1333973

URL: http://svn.apache.org/viewvc?rev=1333973&view=rev
Log:
OPENNLP-342 New Parse Sample converters

Added:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java   (with props)
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java   (with props)
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java   (with props)
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java   (with props)
Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java?rev=1333973&r1=1333972&r2=1333973&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/StreamFactoryRegistry.java Fri May  4 14:13:17 2012
@@ -44,6 +44,9 @@ import opennlp.tools.formats.convert.Nam
 import opennlp.tools.formats.convert.NameToTokenSampleStreamFactory;
 import opennlp.tools.formats.convert.POSToSentenceSampleStreamFactory;
 import opennlp.tools.formats.convert.POSToTokenSampleStreamFactory;
+import opennlp.tools.formats.convert.ParseToPOSSampleStreamFactory;
+import opennlp.tools.formats.convert.ParseToSentenceSampleStreamFactory;
+import opennlp.tools.formats.convert.ParseToTokenSampleStreamFactory;
 import opennlp.tools.formats.frenchtreebank.ConstitParseSampleStreamFactory;
 import opennlp.tools.formats.muc.Muc6FullParseCorefSampleStreamFactory;
 import opennlp.tools.formats.muc.Muc6NameSampleStreamFactory;
@@ -68,9 +71,14 @@ public final class StreamFactoryRegistry
     
     NameToSentenceSampleStreamFactory.registerFactory();
     NameToTokenSampleStreamFactory.registerFactory();
+    
     POSToSentenceSampleStreamFactory.registerFactory();
     POSToTokenSampleStreamFactory.registerFactory();
 
+    ParseToPOSSampleStreamFactory.registerFactory();
+    ParseToSentenceSampleStreamFactory.registerFactory();
+    ParseToTokenSampleStreamFactory.registerFactory();
+    
     BioNLP2004NameSampleStreamFactory.registerFactory();
     Conll02NameSampleStreamFactory.registerFactory();
     Conll03NameSampleStreamFactory.registerFactory();

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java?rev=1333973&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java Fri May  4 14:13:17 2012
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.convert;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.parser.Parse;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.util.FilterObjectStream;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * <b>Note:</b> Do not use this class, internal use only!
+ */
+public class ParseToPOSSampleStream extends FilterObjectStream<Parse, POSSample>  {
+
+  protected ParseToPOSSampleStream(ObjectStream<Parse> samples) {
+    super(samples);
+  }
+
+  public POSSample read() throws IOException {
+    
+    Parse parse = samples.read();
+    
+    if (parse != null) {
+      
+      List<String> sentence = new ArrayList<String>();
+      List<String> tags = new ArrayList<String>();
+      
+      for(Parse tagNode : parse.getTagNodes()) {
+        sentence.add(tagNode.toString());
+        tags.add(tagNode.getType());
+      }
+      
+      return new POSSample(sentence, tags);
+    }
+    else {
+      return null;
+    }
+  }
+}

Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStream.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java?rev=1333973&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java Fri May  4 14:13:17 2012
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.convert;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.formats.LanguageSampleStreamFactory;
+import opennlp.tools.formats.ParseSampleStreamFactory;
+import opennlp.tools.formats.convert.POSToSentenceSampleStreamFactory.Parameters;
+import opennlp.tools.parser.Parse;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.util.ObjectStream;
+
+
+/**
+ * <b>Note:</b> Do not use this class, internal use only!
+ */
+public class ParseToPOSSampleStreamFactory extends LanguageSampleStreamFactory<POSSample> {
+
+  private ParseToPOSSampleStreamFactory() {
+    super(ParseSampleStreamFactory.Parameters.class);
+  }
+
+  public ObjectStream<POSSample> create(String[] args) {
+    
+    Parameters params = ArgumentParser.parse(args, Parameters.class);
+    language = params.getLang();
+    
+    ObjectStream<Parse> parseSampleStream = StreamFactoryRegistry.getFactory(Parse.class,
+        StreamFactoryRegistry.DEFAULT_FORMAT).create(
+        ArgumentParser.filter(args, ParseSampleStreamFactory.Parameters.class));
+    
+    return new ParseToPOSSampleStream(parseSampleStream);
+  }
+  
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(POSSample.class,
+        "parse", new ParseToPOSSampleStreamFactory());
+  }
+}

Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToPOSSampleStreamFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java?rev=1333973&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java Fri May  4 14:13:17 2012
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.convert;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.DetokenizerParameter;
+import opennlp.tools.formats.DetokenizerSampleStreamFactory;
+import opennlp.tools.formats.ParseSampleStreamFactory;
+import opennlp.tools.parser.Parse;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.sentdetect.SentenceSample;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * <b>Note:</b> Do not use this class, internal use only!
+ */
+public class ParseToSentenceSampleStreamFactory extends DetokenizerSampleStreamFactory<SentenceSample> {
+
+  interface Parameters extends ParseSampleStreamFactory.Parameters, DetokenizerParameter {
+  }
+  
+  private ParseToSentenceSampleStreamFactory() {
+    super(Parameters.class);
+  }
+
+  public ObjectStream<SentenceSample> create(String[] args) {
+    Parameters params = ArgumentParser.parse(args, Parameters.class);
+    language = params.getLang();
+    
+    ObjectStream<Parse> parseSampleStream = StreamFactoryRegistry.getFactory(Parse.class,
+        StreamFactoryRegistry.DEFAULT_FORMAT).create(
+        ArgumentParser.filter(args, Parameters.class));
+    
+    return new POSToSentenceSampleStream(createDetokenizer(params),
+        new ParseToPOSSampleStream(parseSampleStream), 30);
+  }
+  
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(SentenceSample.class,
+        "parse", new ParseToSentenceSampleStreamFactory());
+  }
+}

Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToSentenceSampleStreamFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java?rev=1333973&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java Fri May  4 14:13:17 2012
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.convert;
+
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.StreamFactoryRegistry;
+import opennlp.tools.cmdline.params.DetokenizerParameter;
+import opennlp.tools.formats.DetokenizerSampleStreamFactory;
+import opennlp.tools.formats.ParseSampleStreamFactory;
+import opennlp.tools.formats.WordTagSampleStreamFactory;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.tokenize.TokenSample;
+import opennlp.tools.util.ObjectStream;
+
+/**
+ * <b>Note:</b> Do not use this class, internal use only!
+ */
+public class ParseToTokenSampleStreamFactory extends DetokenizerSampleStreamFactory<TokenSample> {
+
+  interface Parameters extends ParseSampleStreamFactory.Parameters, DetokenizerParameter {
+  }
+  
+  private ParseToTokenSampleStreamFactory() {
+    super(Parameters.class);
+  }
+
+  public ObjectStream<TokenSample> create(String[] args) {
+    Parameters params = ArgumentParser.parse(args, Parameters.class);
+    language = params.getLang();
+
+    ObjectStream<POSSample> posSampleStream = StreamFactoryRegistry.getFactory(POSSample.class,
+        StreamFactoryRegistry.DEFAULT_FORMAT).create(
+        ArgumentParser.filter(args, WordTagSampleStreamFactory.Parameters.class));
+    return new POSToTokenSampleStream(createDetokenizer(params), posSampleStream);
+  }
+  
+  public static void registerFactory() {
+    StreamFactoryRegistry.registerFactory(TokenSample.class,
+        "parse", new ParseToTokenSampleStreamFactory());
+  }
+}

Propchange: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/formats/convert/ParseToTokenSampleStreamFactory.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain