You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/03/12 15:03:15 UTC

svn commit: r1576746 - in /opennlp/trunk/opennlp-tools/src: main/java/opennlp/tools/cmdline/namefind/ main/java/opennlp/tools/util/featuregen/ test/java/opennlp/tools/util/featuregen/

Author: joern
Date: Wed Mar 12 14:03:14 2014
New Revision: 1576746

URL: http://svn.apache.org/r1576746
Log:
OPENNLP-605 Now the Custom Feature Generators gets configurged properly and returns instantiated Artifact Serializers instead.

Added:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java
Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/ArtifactToSerializerMapper.java
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java
    opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java?rev=1576746&r1=1576745&r2=1576746&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java Wed Mar 12 14:03:14 2014
@@ -67,7 +67,7 @@ public final class TokenNameFinderCrossV
         TokenNameFinderTrainerTool.openFeatureGeneratorBytes(params.getFeaturegen());
 
     Map<String, Object> resources =
-        TokenNameFinderTrainerTool.loadResources(params.getResources());
+        TokenNameFinderTrainerTool.loadResources(params.getResources(), params.getFeaturegen());
 
     if (params.getNameTypes() != null) {
       String nameTypes[] = params.getNameTypes().split(",");

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java?rev=1576746&r1=1576745&r2=1576746&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java Wed Mar 12 14:03:14 2014
@@ -37,6 +37,7 @@ import opennlp.tools.namefind.TokenNameF
 import opennlp.tools.postag.POSTaggerFactory;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.SequenceCodec;
+import opennlp.tools.util.featuregen.GeneratorFactory;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.ModelUtil;
 
@@ -84,7 +85,7 @@ public final class TokenNameFinderTraine
     return featureGeneratorBytes;
   }
   
-  public static Map<String, Object> loadResources(File resourcePath) {
+  public static Map<String, Object> loadResources(File resourcePath, File featureGenDescriptor) {
     Map<String, Object> resources = new HashMap<String, Object>();
 
     if (resourcePath != null) {
@@ -92,6 +93,20 @@ public final class TokenNameFinderTraine
       Map<String, ArtifactSerializer> artifactSerializers = TokenNameFinderModel
           .createArtifactSerializers();
 
+      
+      // TODO: If there is descriptor file, it should be consulted too 
+      if (featureGenDescriptor != null) {
+        
+        InputStream xmlDescriptorIn = null;
+        
+        try {
+          artifactSerializers.putAll(GeneratorFactory.extractCustomArtifactSerializerMappings(xmlDescriptorIn));
+        } catch (IOException e) {
+          // TODO: Improve error handling!
+          e.printStackTrace();
+        }
+      }
+      
       File resourceFiles[] = resourcePath.listFiles();
 
       // TODO: Filter files, also files with start with a dot
@@ -139,11 +154,12 @@ public final class TokenNameFinderTraine
     return resources;
   }
   
-  static Map<String, Object> loadResources(String resourceDirectory) {
+  static Map<String, Object> loadResources(String resourceDirectory, File featureGeneratorDescriptor) {
 
     if (resourceDirectory != null) {
       File resourcePath = new File(resourceDirectory);
-      return loadResources(resourcePath);
+      
+      return loadResources(resourcePath, featureGeneratorDescriptor);
     }
 
     return new HashMap<String, Object>();
@@ -166,7 +182,7 @@ public final class TokenNameFinderTraine
     //       Must be loaded into memory, or written to tmp file until descriptor 
     //       is loaded which defines parses when model is loaded
     
-    Map<String, Object> resources = loadResources(params.getResources());
+    Map<String, Object> resources = loadResources(params.getResources(), params.getFeaturegen());
         
     CmdLineUtil.checkOutputFile("name finder model", modelOutFile);
 

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/ArtifactToSerializerMapper.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/ArtifactToSerializerMapper.java?rev=1576746&r1=1576745&r2=1576746&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/ArtifactToSerializerMapper.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/ArtifactToSerializerMapper.java Wed Mar 12 14:03:14 2014
@@ -19,8 +19,8 @@ package opennlp.tools.util.featuregen;
 
 import java.util.Map;
 
-import opennlp.tools.util.model.SerializableArtifact;
+import opennlp.tools.util.model.ArtifactSerializer;
 
 public interface ArtifactToSerializerMapper {
-  Map<String, Class<? extends SerializableArtifact>> getArtifactSerializerMapping();
+  Map<String, ArtifactSerializer<?>> getArtifactSerializerMapping();
 }

Added: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java?rev=1576746&view=auto
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java (added)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/CustomFeatureGenerator.java Wed Mar 12 14:03:14 2014
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.Map;
+
+import opennlp.tools.util.InvalidFormatException;
+
+public abstract class CustomFeatureGenerator implements AdaptiveFeatureGenerator {
+  
+  /**
+   * Initialized the Custom Feature Generator with defined properties and loaded resources.
+   * 
+   * @param properties
+   * @param resourceProvider
+   */
+  public abstract void init(Map<String, String> properties, FeatureGeneratorResourceProvider resourceProvider)
+        throws InvalidFormatException;
+}

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java?rev=1576746&r1=1576745&r2=1576746&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java Wed Mar 12 14:03:14 2014
@@ -37,9 +37,11 @@ import javax.xml.xpath.XPathFactory;
 import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.ext.ExtensionLoader;
+import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.SerializableArtifact;
 
 import org.w3c.dom.Element;
+import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 import org.xml.sax.SAXException;
@@ -102,8 +104,6 @@ public class GeneratorFactory {
      */
     AdaptiveFeatureGenerator create(Element generatorElement,
         FeatureGeneratorResourceProvider resourceManager) throws InvalidFormatException;
-    
-    //
   }
 
   /**
@@ -503,7 +503,25 @@ public class GeneratorFactory {
       AdaptiveFeatureGenerator generator = ExtensionLoader.instantiateExtension(AdaptiveFeatureGenerator.class,
           featureGeneratorClassName);
       
-      // TODO: User could define artifact mappings ...
+      if (generator instanceof CustomFeatureGenerator) {
+        
+        CustomFeatureGenerator customGenerator = (CustomFeatureGenerator) generator;
+        
+        Map<String, String> properties = new HashMap<>();
+
+        NamedNodeMap attributes = generatorElement.getAttributes();
+        
+        for (int i = 0; i < attributes.getLength(); i++) {
+          Node attribute = attributes.item(i);
+          if (!"class".equals(attribute.getNodeName())) {
+            properties.put(attribute.getNodeName(), attribute.getNodeValue());
+          }
+        }
+        
+        if (resourceManager != null) {
+          customGenerator.init(properties, resourceManager);
+        }
+      }
       
       return generator;
     }
@@ -612,11 +630,11 @@ public class GeneratorFactory {
     return createGenerator(generatorElement, resourceManager);
   }
   
-  public static Map<String, Class<? extends SerializableArtifact>> extractCustomArtifactSerializerMappings(
-      InputStream xmlDescriptorIn, FeatureGeneratorResourceProvider resourceManager)
+  public static Map<String, ArtifactSerializer<?>> extractCustomArtifactSerializerMappings(
+      InputStream xmlDescriptorIn)
       throws IOException, InvalidFormatException {
     
-    Map<String, Class<? extends SerializableArtifact>> mapping = new HashMap<>();
+    Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
     
     org.w3c.dom.Document xmlDescriptorDOM = createDOM(xmlDescriptorIn);
     
@@ -634,14 +652,15 @@ public class GeneratorFactory {
       if (customElements.item(i) instanceof Element) {
         Element customElement = (Element) customElements.item(i);
         
-        AdaptiveFeatureGenerator generator = createGenerator(customElement, resourceManager);
+        // Note: The resource provider is not available at that point, to provide
+        // resources they need to be loaded first!
+        AdaptiveFeatureGenerator generator = createGenerator(customElement, null);
         
         if (generator instanceof ArtifactToSerializerMapper) {
           ArtifactToSerializerMapper mapper = (ArtifactToSerializerMapper) generator;
           mapping.putAll(mapper.getArtifactSerializerMapping());
         }
       }
-      
     }
     
     return mapping;

Modified: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java?rev=1576746&r1=1576745&r2=1576746&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java (original)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/FeatureGenWithSerializerMapping.java Wed Mar 12 14:03:14 2014
@@ -22,9 +22,11 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import opennlp.tools.util.model.SerializableArtifact;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.model.ArtifactSerializer;
 
-public class FeatureGenWithSerializerMapping implements AdaptiveFeatureGenerator, ArtifactToSerializerMapper {
+public class FeatureGenWithSerializerMapping extends CustomFeatureGenerator 
+  implements ArtifactToSerializerMapper {
 
   @Override
   public void createFeatures(List<String> features, String[] tokens, int index,
@@ -40,9 +42,15 @@ public class FeatureGenWithSerializerMap
   }
 
   @Override
-  public Map<String, Class<? extends SerializableArtifact>> getArtifactSerializerMapping() {
-    Map<String, Class<? extends SerializableArtifact>> mapping = new HashMap<>();
-    mapping.put("test.resource", W2VClassesDictionary.class);
+  public Map<String, ArtifactSerializer<?>> getArtifactSerializerMapping() {
+    Map<String, ArtifactSerializer<?>> mapping = new HashMap<>();
+    mapping.put("test.resource", new W2VClassesDictionary.W2VClassesDictionarySerializer());
     return Collections.unmodifiableMap(mapping);
   }
+
+  @Override
+  public void init(Map<String, String> properties,
+      FeatureGeneratorResourceProvider resourceProvider)
+      throws InvalidFormatException {
+  }
 }

Modified: opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java?rev=1576746&r1=1576745&r2=1576746&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java (original)
+++ opennlp/trunk/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/GeneratorFactoryTest.java Wed Mar 12 14:03:14 2014
@@ -18,7 +18,7 @@
 
 package opennlp.tools.util.featuregen;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.*;
 import static org.junit.Assert.assertNotNull;
 
 import java.io.IOException;
@@ -29,6 +29,7 @@ import java.util.Map;
 
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.featuregen.W2VClassesDictionary.W2VClassesDictionarySerializer;
+import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.SerializableArtifact;
 
 import org.junit.Test;
@@ -109,9 +110,9 @@ public class GeneratorFactoryTest {
     InputStream descIn = getClass().getResourceAsStream(
         "/opennlp/tools/util/featuregen/CustomClassLoadingWithSerializers.xml");
     
-    Map<String, Class<? extends SerializableArtifact>> mapping =
-        GeneratorFactory.extractCustomArtifactSerializerMappings(descIn, null);
+    Map<String, ArtifactSerializer<?>> mapping =
+        GeneratorFactory.extractCustomArtifactSerializerMappings(descIn);
     
-    assertEquals(W2VClassesDictionary.class, mapping.get("test.resource"));
+    assertTrue(mapping.get("test.resource") instanceof W2VClassesDictionarySerializer);
   }
 }
\ No newline at end of file