You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/05/30 10:28:16 UTC

svn commit: r1129054 - /incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java

Author: joern
Date: Mon May 30 08:28:15 2011
New Revision: 1129054

URL: http://svn.apache.org/viewvc?rev=1129054&view=rev
Log:
OPENNLP-127 Now checks when model is loaded that tag dictionary is compatible

Modified:
    incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java

Modified: incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java?rev=1129054&r1=1129053&r2=1129054&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java (original)
+++ incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/postag/POSModel.java Mon May 30 08:28:15 2011
@@ -24,7 +24,10 @@ import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.util.Collections;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 
 import opennlp.model.AbstractModel;
 import opennlp.model.GenericModelReader;
@@ -116,8 +119,32 @@ public final class POSModel extends Base
 
     Object tagdictEntry = artifactMap.get(TAG_DICTIONARY_ENTRY_NAME);
 
-    if (tagdictEntry != null && !(tagdictEntry instanceof POSDictionary)) {
-      throw new InvalidFormatException("Abbreviations dictionary has wrong type!");
+    if (tagdictEntry != null) {
+      if (tagdictEntry instanceof POSDictionary) {
+        POSDictionary posDict = (POSDictionary) tagdictEntry;
+        
+        Set<String> dictTags = new HashSet<String>();
+        
+        for (String word : posDict) {
+          Collections.addAll(dictTags, posDict.getTags(word)); 
+        }
+        
+        Set<String> modelTags = new HashSet<String>();
+        
+        AbstractModel posModel = getPosModel();
+        
+        for  (int i = 0; i < posModel.getNumOutcomes(); i++) {
+          modelTags.add(posModel.getOutcome(i));
+        }
+        
+        if (!modelTags.containsAll(dictTags)) {
+          throw new InvalidFormatException("Tag dictioinary contains tags " +
+          		"which are unkown by the model!");
+        }
+      }
+      else {
+        throw new InvalidFormatException("Abbreviations dictionary has wrong type!");
+      }
     }
 
     Object ngramDictEntry = artifactMap.get(NGRAM_DICTIONARY_ENTRY_NAME);