You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/05/30 10:28:16 UTC
svn commit: r1129054 -
/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
Author: joern
Date: Mon May 30 08:28:15 2011
New Revision: 1129054
URL: http://svn.apache.org/viewvc?rev=1129054&view=rev
Log:
OPENNLP-127 Now checks when model is loaded that tag dictionary is compatible
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java?rev=1129054&r1=1129053&r2=1129054&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java Mon May 30 08:28:15 2011
@@ -24,7 +24,10 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.util.Collections;
+import java.util.HashSet;
import java.util.Map;
+import java.util.Set;
import opennlp.model.AbstractModel;
import opennlp.model.GenericModelReader;
@@ -116,8 +119,32 @@ public final class POSModel extends Base
Object tagdictEntry = artifactMap.get(TAG_DICTIONARY_ENTRY_NAME);
- if (tagdictEntry != null && !(tagdictEntry instanceof POSDictionary)) {
- throw new InvalidFormatException("Abbreviations dictionary has wrong type!");
+ if (tagdictEntry != null) {
+ if (tagdictEntry instanceof POSDictionary) {
+ POSDictionary posDict = (POSDictionary) tagdictEntry;
+
+ Set<String> dictTags = new HashSet<String>();
+
+ for (String word : posDict) {
+ Collections.addAll(dictTags, posDict.getTags(word));
+ }
+
+ Set<String> modelTags = new HashSet<String>();
+
+ AbstractModel posModel = getPosModel();
+
+ for (int i = 0; i < posModel.getNumOutcomes(); i++) {
+ modelTags.add(posModel.getOutcome(i));
+ }
+
+ if (!modelTags.containsAll(dictTags)) {
+ throw new InvalidFormatException("Tag dictioinary contains tags " +
+ "which are unkown by the model!");
+ }
+ }
+ else {
+ throw new InvalidFormatException("Abbreviations dictionary has wrong type!");
+ }
}
Object ngramDictEntry = artifactMap.get(NGRAM_DICTIONARY_ENTRY_NAME);