You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/07/13 11:06:31 UTC
svn commit: r1361114 - in
/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools: chunker/ doccat/
namefind/ parser/ postag/ sentdetect/ tokenize/ util/model/
Author: joern
Date: Fri Jul 13 09:06:30 2012
New Revision: 1361114
URL: http://svn.apache.org/viewvc?rev=1361114&view=rev
Log:
OPENNLP-505 Added constructors which load the model from a File or URL object.
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java Fri Jul 13 09:06:30 2012
@@ -18,11 +18,13 @@
package opennlp.tools.chunker;
+import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URL;
import java.util.Map;
import opennlp.model.AbstractModel;
@@ -59,6 +61,14 @@ public class ChunkerModel extends BaseMo
super(COMPONENT_NAME, in);
}
+ public ChunkerModel(File modelFile) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelFile);
+ }
+
+ public ChunkerModel(URL modelURL) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelURL);
+ }
+
@Override
protected void validateArtifactMap() throws InvalidFormatException {
super.validateArtifactMap();
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/doccat/DoccatModel.java Fri Jul 13 09:06:30 2012
@@ -17,8 +17,10 @@
package opennlp.tools.doccat;
+import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URL;
import java.util.Map;
import opennlp.model.AbstractModel;
@@ -45,6 +47,14 @@ public class DoccatModel extends BaseMod
public DoccatModel(InputStream in) throws IOException, InvalidFormatException {
super(COMPONENT_NAME, in);
}
+
+ public DoccatModel(File modelFile) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelFile);
+ }
+
+ public DoccatModel(URL modelURL) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelURL);
+ }
@Override
protected void validateArtifactMap() throws InvalidFormatException {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java Fri Jul 13 09:06:30 2012
@@ -19,9 +19,11 @@
package opennlp.tools.namefind;
import java.io.ByteArrayInputStream;
+import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@@ -108,6 +110,15 @@ public class TokenNameFinderModel extend
super(COMPONENT_NAME, in);
}
+ public TokenNameFinderModel(File modelFile) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelFile);
+ }
+
+ public TokenNameFinderModel(URL modelURL) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelURL);
+ }
+
+
/**
* Retrieves the {@link TokenNameFinder} model.
*
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/parser/ParserModel.java Fri Jul 13 09:06:30 2012
@@ -19,11 +19,13 @@
package opennlp.tools.parser;
import java.io.BufferedReader;
+import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
+import java.net.URL;
import java.util.Map;
import opennlp.model.AbstractModel;
@@ -150,6 +152,14 @@ public class ParserModel extends BaseMod
super(COMPONENT_NAME, in);
}
+ public ParserModel(File modelFile) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelFile);
+ }
+
+ public ParserModel(URL modelURL) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelURL);
+ }
+
@Override
protected void createArtifactSerializers(
Map<String, ArtifactSerializer> serializers) {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java Fri Jul 13 09:06:30 2012
@@ -18,8 +18,10 @@
package opennlp.tools.postag;
+import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URL;
import java.util.Map;
import opennlp.model.AbstractModel;
@@ -79,6 +81,14 @@ public final class POSModel extends Base
public POSModel(InputStream in) throws IOException, InvalidFormatException {
super(COMPONENT_NAME, in);
}
+
+ public POSModel(File modelFile) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelFile);
+ }
+
+ public POSModel(URL modelURL) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelURL);
+ }
@Override
protected Class<? extends BaseToolFactory> getDefaultFactory() {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceModel.java Fri Jul 13 09:06:30 2012
@@ -24,6 +24,7 @@ import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URL;
import java.util.Map;
import opennlp.model.AbstractModel;
@@ -93,6 +94,14 @@ public class SentenceModel extends BaseM
public SentenceModel(InputStream in) throws IOException, InvalidFormatException {
super(COMPONENT_NAME, in);
}
+
+ public SentenceModel(File modelFile) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelFile);
+ }
+
+ public SentenceModel(URL modelURL) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelURL);
+ }
@Override
protected void validateArtifactMap() throws InvalidFormatException {
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerModel.java Fri Jul 13 09:06:30 2012
@@ -19,11 +19,13 @@
package opennlp.tools.tokenize;
import java.io.DataInputStream;
+import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.net.URL;
import java.util.Map;
import opennlp.maxent.io.BinaryGISModelReader;
@@ -122,6 +124,14 @@ public final class TokenizerModel extend
public TokenizerModel(InputStream in) throws IOException, InvalidFormatException {
super(COMPONENT_NAME, in);
}
+
+ public TokenizerModel(File modelFile) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelFile);
+ }
+
+ public TokenizerModel(URL modelURL) throws IOException, InvalidFormatException {
+ super(COMPONENT_NAME, modelURL);
+ }
/**
* Checks if the tokenizer model has the right outcomes.
Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java?rev=1361114&r1=1361113&r2=1361114&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java Fri Jul 13 09:06:30 2012
@@ -18,11 +18,16 @@
package opennlp.tools.util.model;
+import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.net.URI;
+import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
@@ -59,7 +64,7 @@ public abstract class BaseModel implemen
private Map<String, ArtifactSerializer> artifactSerializers =
new HashMap<String, ArtifactSerializer>();
- protected final Map<String, Object> artifactMap;
+ protected final Map<String, Object> artifactMap = new HashMap<String, Object>();
protected BaseToolFactory toolFactory;
@@ -72,19 +77,13 @@ public abstract class BaseModel implemen
private final boolean isLoadedFromSerialized;
- /**
- * Initializes the current instance. The sub-class constructor should call the
- * method {@link #checkArtifactMap()} to check the artifact map is OK.
- *
- * @param componentName
- * the component name
- * @param languageCode
- * the language code
- * @param manifestInfoEntries
- * additional information in the manifest
- */
- protected BaseModel(String componentName, String languageCode, Map<String, String> manifestInfoEntries) {
- this(componentName, languageCode, manifestInfoEntries, null);
+ private BaseModel(String componentName, boolean isLoadedFromSerialized) {
+ this.isLoadedFromSerialized = isLoadedFromSerialized;
+
+ if (componentName == null)
+ throw new IllegalArgumentException("componentName must not be null!");
+
+ this.componentName = componentName;
}
/**
@@ -106,18 +105,11 @@ public abstract class BaseModel implemen
protected BaseModel(String componentName, String languageCode,
Map<String, String> manifestInfoEntries, BaseToolFactory factory) {
- isLoadedFromSerialized = false;
+ this(componentName, false);
- if (componentName == null)
- throw new IllegalArgumentException("componentName must not be null!");
-
if (languageCode == null)
throw new IllegalArgumentException("languageCode must not be null!");
- this.componentName = componentName;
-
- artifactMap = new HashMap<String, Object>();
-
createBaseArtifactSerializers(artifactSerializers);
Properties manifest = new Properties();
@@ -157,6 +149,21 @@ public abstract class BaseModel implemen
}
/**
+ * Initializes the current instance. The sub-class constructor should call the
+ * method {@link #checkArtifactMap()} to check the artifact map is OK.
+ *
+ * @param componentName
+ * the component name
+ * @param languageCode
+ * the language code
+ * @param manifestInfoEntries
+ * additional information in the manifest
+ */
+ protected BaseModel(String componentName, String languageCode, Map<String, String> manifestInfoEntries) {
+ this(componentName, languageCode, manifestInfoEntries, null);
+ }
+
+ /**
* Initializes the current instance.
*
* @param componentName the component name
@@ -166,18 +173,41 @@ public abstract class BaseModel implemen
* @throws InvalidFormatException
*/
protected BaseModel(String componentName, InputStream in) throws IOException, InvalidFormatException {
-
- this.isLoadedFromSerialized = true;
-
- if (componentName == null)
- throw new IllegalArgumentException("componentName must not be null!");
+ this(componentName, true);
if (in == null)
throw new IllegalArgumentException("in must not be null!");
- this.componentName = componentName;
+ loadModel(in);
+ }
+
+ protected BaseModel(String componentName, File modelFile) throws IOException, InvalidFormatException {
+ this(componentName, true);
- artifactMap = new HashMap<String, Object>();
+ InputStream in = new BufferedInputStream(new FileInputStream(modelFile));
+
+ try {
+ loadModel(in);
+ }
+ finally {
+ in.close();
+ }
+ }
+
+ protected BaseModel(String componentName, URL modelURL) throws IOException, InvalidFormatException {
+ this(componentName, true);
+
+ InputStream in = modelURL.openStream();
+
+ try {
+ loadModel(in);
+ }
+ finally {
+ in.close();
+ }
+ }
+
+ private void loadModel(InputStream in) throws IOException, InvalidFormatException {
createBaseArtifactSerializers(artifactSerializers);
final ZipInputStream zip = new ZipInputStream(in);
@@ -210,7 +240,7 @@ public abstract class BaseModel implemen
finishLoadingArtifacts();
checkArtifactMap();
}
-
+
private void initializeFactory() throws InvalidFormatException {
String factoryName = getManifestProperty(FACTORY_NAME);
if (factoryName == null) {