You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/07/13 11:06:31 UTC

svn commit: r1361114 - in /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: chunker/ doccat/ namefind/ parser/ postag/ sentdetect/ tokenize/ util/model/

Author: joern
Date: Fri Jul 13 09:06:30 2012
New Revision: 1361114

URL: http://svn.apache.org/viewvc?rev=1361114&view=rev
Log:
OPENNLP-505 Added constructors which load the model from a File or URL object.

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java Fri Jul 13 09:06:30 2012
@@ -18,11 +18,13 @@
 
 package opennlp.tools.chunker;
 
+import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URL;
 import java.util.Map;
 
 import opennlp.model.AbstractModel;
@@ -59,6 +61,14 @@ public class ChunkerModel extends BaseMo
     super(COMPONENT_NAME, in);
   }
 
+  public ChunkerModel(File modelFile) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelFile);
+  }
+  
+  public ChunkerModel(URL modelURL) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelURL);
+  }
+  
   @Override
   protected void validateArtifactMap() throws InvalidFormatException {
     super.validateArtifactMap();

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java Fri Jul 13 09:06:30 2012
@@ -17,8 +17,10 @@
 
 package opennlp.tools.doccat;
 
+import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URL;
 import java.util.Map;
 
 import opennlp.model.AbstractModel;
@@ -45,6 +47,14 @@ public class DoccatModel extends BaseMod
   public DoccatModel(InputStream in) throws IOException, InvalidFormatException {
     super(COMPONENT_NAME, in);
   }
+  
+  public DoccatModel(File modelFile) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelFile);
+  }
+  
+  public DoccatModel(URL modelURL) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelURL);
+  }
 
   @Override
   protected void validateArtifactMap() throws InvalidFormatException {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java Fri Jul 13 09:06:30 2012
@@ -19,9 +19,11 @@
 package opennlp.tools.namefind;
 
 import java.io.ByteArrayInputStream;
+import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -108,6 +110,15 @@ public class TokenNameFinderModel extend
     super(COMPONENT_NAME, in);
   }
   
+  public TokenNameFinderModel(File modelFile) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelFile);
+  }
+  
+  public TokenNameFinderModel(URL modelURL) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelURL);
+  }
+  
+  
   /**
    * Retrieves the {@link TokenNameFinder} model.
    *

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java Fri Jul 13 09:06:30 2012
@@ -19,11 +19,13 @@
 package opennlp.tools.parser;
 
 import java.io.BufferedReader;
+import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
+import java.net.URL;
 import java.util.Map;
 
 import opennlp.model.AbstractModel;
@@ -150,6 +152,14 @@ public class ParserModel extends BaseMod
     super(COMPONENT_NAME, in);
   }
   
+  public ParserModel(File modelFile) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelFile);
+  }
+  
+  public ParserModel(URL modelURL) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelURL);
+  }
+  
   @Override
   protected void createArtifactSerializers(
       Map<String, ArtifactSerializer> serializers) {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java Fri Jul 13 09:06:30 2012
@@ -18,8 +18,10 @@
 
 package opennlp.tools.postag;
 
+import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URL;
 import java.util.Map;
 
 import opennlp.model.AbstractModel;
@@ -79,6 +81,14 @@ public final class POSModel extends Base
   public POSModel(InputStream in) throws IOException, InvalidFormatException {
     super(COMPONENT_NAME, in);
   }
+  
+  public POSModel(File modelFile) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelFile);
+  }
+  
+  public POSModel(URL modelURL) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelURL);
+  }
 
   @Override
   protected Class<? extends BaseToolFactory> getDefaultFactory() {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java Fri Jul 13 09:06:30 2012
@@ -24,6 +24,7 @@ import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URL;
 import java.util.Map;
 
 import opennlp.model.AbstractModel;
@@ -93,6 +94,14 @@ public class SentenceModel extends BaseM
   public SentenceModel(InputStream in) throws IOException, InvalidFormatException {
     super(COMPONENT_NAME, in);
   }
+  
+  public SentenceModel(File modelFile) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelFile);
+  }
+  
+  public SentenceModel(URL modelURL) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelURL);
+  }
 
   @Override
   protected void validateArtifactMap() throws InvalidFormatException {

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java Fri Jul 13 09:06:30 2012
@@ -19,11 +19,13 @@
 package opennlp.tools.tokenize;
 
 import java.io.DataInputStream;
+import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.net.URL;
 import java.util.Map;
 
 import opennlp.maxent.io.BinaryGISModelReader;
@@ -122,6 +124,14 @@ public final class TokenizerModel extend
   public TokenizerModel(InputStream in) throws IOException, InvalidFormatException {
     super(COMPONENT_NAME, in);
   }
+  
+  public TokenizerModel(File modelFile) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelFile);
+  }
+  
+  public TokenizerModel(URL modelURL) throws IOException, InvalidFormatException {
+    super(COMPONENT_NAME, modelURL);
+  }
 
   /**
    * Checks if the tokenizer model has the right outcomes.

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java Fri Jul 13 09:06:30 2012
@@ -18,11 +18,16 @@
 
 package opennlp.tools.util.model;
 
+import java.io.BufferedInputStream;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.net.URI;
+import java.net.URL;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Properties;
@@ -59,7 +64,7 @@ public abstract class BaseModel implemen
   private Map<String, ArtifactSerializer> artifactSerializers =
       new HashMap<String, ArtifactSerializer>();
 
-  protected final Map<String, Object> artifactMap;
+  protected final Map<String, Object> artifactMap = new HashMap<String, Object>();
   
   protected BaseToolFactory toolFactory;
   
@@ -72,19 +77,13 @@ public abstract class BaseModel implemen
   
   private final boolean isLoadedFromSerialized;
 
-  /**
-   * Initializes the current instance. The sub-class constructor should call the
-   * method {@link #checkArtifactMap()} to check the artifact map is OK.
-   * 
-   * @param componentName
-   *          the component name
-   * @param languageCode
-   *          the language code
-   * @param manifestInfoEntries
-   *          additional information in the manifest
-   */
-  protected BaseModel(String componentName, String languageCode, Map<String, String> manifestInfoEntries) {
-    this(componentName, languageCode, manifestInfoEntries, null);
+  private BaseModel(String componentName, boolean isLoadedFromSerialized) {
+    this.isLoadedFromSerialized = isLoadedFromSerialized;
+    
+    if (componentName == null)
+      throw new IllegalArgumentException("componentName must not be null!");
+    
+    this.componentName = componentName;
   }
   
   /**
@@ -106,18 +105,11 @@ public abstract class BaseModel implemen
   protected BaseModel(String componentName, String languageCode,
       Map<String, String> manifestInfoEntries, BaseToolFactory factory) {
 
-    isLoadedFromSerialized = false;
+    this(componentName, false);
 
-    if (componentName == null)
-        throw new IllegalArgumentException("componentName must not be null!");
-    
     if (languageCode == null)
         throw new IllegalArgumentException("languageCode must not be null!");
 
-    this.componentName = componentName;
-    
-    artifactMap = new HashMap<String, Object>();
-    
     createBaseArtifactSerializers(artifactSerializers);
     
     Properties manifest = new Properties();
@@ -157,6 +149,21 @@ public abstract class BaseModel implemen
   }
 
   /**
+   * Initializes the current instance. The sub-class constructor should call the
+   * method {@link #checkArtifactMap()} to check the artifact map is OK.
+   * 
+   * @param componentName
+   *          the component name
+   * @param languageCode
+   *          the language code
+   * @param manifestInfoEntries
+   *          additional information in the manifest
+   */
+  protected BaseModel(String componentName, String languageCode, Map<String, String> manifestInfoEntries) {
+    this(componentName, languageCode, manifestInfoEntries, null);
+  }
+  
+  /**
    * Initializes the current instance.
    * 
    * @param componentName the component name
@@ -166,18 +173,41 @@ public abstract class BaseModel implemen
    * @throws InvalidFormatException
    */
   protected BaseModel(String componentName, InputStream in) throws IOException, InvalidFormatException {
-
-    this.isLoadedFromSerialized = true;
-
-    if (componentName == null)
-      throw new IllegalArgumentException("componentName must not be null!");
+    this(componentName, true);
     
     if (in == null)
         throw new IllegalArgumentException("in must not be null!");
 
-    this.componentName = componentName;
+    loadModel(in);
+  }
+
+  protected BaseModel(String componentName, File modelFile) throws IOException, InvalidFormatException  {
+    this(componentName, true);
     
-    artifactMap = new HashMap<String, Object>();
+    InputStream in = new BufferedInputStream(new FileInputStream(modelFile));
+    
+    try {
+      loadModel(in);
+    }
+    finally {
+      in.close();
+    }
+  }
+
+  protected BaseModel(String componentName, URL modelURL) throws IOException, InvalidFormatException  {
+    this(componentName, true);
+    
+    InputStream in = modelURL.openStream();
+
+    try {
+      loadModel(in);
+    }
+    finally {
+      in.close();
+    }
+  }
+
+  private void loadModel(InputStream in) throws IOException, InvalidFormatException {
     createBaseArtifactSerializers(artifactSerializers);
 
     final ZipInputStream zip = new ZipInputStream(in);
@@ -210,7 +240,7 @@ public abstract class BaseModel implemen
     finishLoadingArtifacts();
     checkArtifactMap();
   }
-
+  
   private void initializeFactory() throws InvalidFormatException {
     String factoryName = getManifestProperty(FACTORY_NAME);
     if (factoryName == null) {