You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/04/16 17:26:46 UTC

[15/50] [abbrv] opennlp git commit: OPENNLP-983: Make suffix/prefix length configurable

OPENNLP-983: Make suffix/prefix length configurable

This closes #121


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/1cd2658d
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/1cd2658d
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/1cd2658d

Branch: refs/heads/parser_regression
Commit: 1cd2658d0179afcf982229fab9c24da62f944c58
Parents: fc3b12f
Author: jzonthemtn <je...@mtnfog.com>
Authored: Mon Feb 13 07:57:21 2017 -0500
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Sun Apr 16 19:24:53 2017 +0200

----------------------------------------------------------------------
 .../tools/util/featuregen/GeneratorFactory.java | 22 ++++-
 .../util/featuregen/PrefixFeatureGenerator.java | 32 +++++--
 .../util/featuregen/SuffixFeatureGenerator.java | 33 +++++--
 .../featuregen/PrefixFeatureGeneratorTest.java  | 92 ++++++++++++++++++++
 .../featuregen/SuffixFeatureGeneratorTest.java  | 92 ++++++++++++++++++++
 5 files changed, 251 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
index fa97f43..ef08cfb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/GeneratorFactory.java
@@ -555,7 +555,16 @@ public class GeneratorFactory {
 
     public AdaptiveFeatureGenerator create(Element generatorElement,
         FeatureGeneratorResourceProvider resourceManager) {
-      return new PrefixFeatureGenerator();
+        
+      String attribute = generatorElement.getAttribute("length");
+        
+      int prefixLength = PrefixFeatureGenerator.DEFAULT_MAX_LENGTH;
+        
+      if (!Objects.equals(attribute, "")) {
+        prefixLength = Integer.parseInt(attribute);
+      }
+        
+      return new PrefixFeatureGenerator(prefixLength);
     }
 
     static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {
@@ -570,7 +579,16 @@ public class GeneratorFactory {
 
     public AdaptiveFeatureGenerator create(Element generatorElement,
         FeatureGeneratorResourceProvider resourceManager) {
-      return new SuffixFeatureGenerator();
+        
+      String attribute = generatorElement.getAttribute("length");
+        
+      int suffixLength = SuffixFeatureGenerator.DEFAULT_MAX_LENGTH;
+        
+      if (!Objects.equals(attribute, "")) {
+        suffixLength = Integer.parseInt(attribute);
+      }
+        
+      return new SuffixFeatureGenerator(suffixLength);
     }
 
     static void register(Map<String, XmlFeatureGeneratorFactory> factoryMap) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
index 8cdd48f..04fcd15 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/PrefixFeatureGenerator.java
@@ -21,21 +21,35 @@ import java.util.List;
 
 public class PrefixFeatureGenerator implements AdaptiveFeatureGenerator {
 
-  private static final int PREFIX_LENGTH = 4;
-
-  private static String[] getPrefixes(String lex) {
-    String[] prefs = new String[PREFIX_LENGTH];
-    for (int li = 0; li < PREFIX_LENGTH; li++) {
-      prefs[li] = lex.substring(0, Math.min(li + 1, lex.length()));
-    }
-    return prefs;
+  static final int DEFAULT_MAX_LENGTH = 4;
+  
+  private final int prefixLength;
+  
+  public PrefixFeatureGenerator() {
+    prefixLength = DEFAULT_MAX_LENGTH;
+  }
+  
+  public PrefixFeatureGenerator(int prefixLength) {
+    this.prefixLength = prefixLength;
   }
 
+  @Override
   public void createFeatures(List<String> features, String[] tokens, int index,
       String[] previousOutcomes) {
-    String[] prefs = PrefixFeatureGenerator.getPrefixes(tokens[index]);
+    String[] prefs = getPrefixes(tokens[index]);
     for (String pref : prefs) {
       features.add("pre=" + pref);
     }
   }
+  
+  private String[] getPrefixes(String lex) {
+      
+    int prefixes = Math.min(prefixLength, lex.length());
+    
+    String[] prefs = new String[prefixes];
+    for (int li = 0; li < prefixes; li++) {
+      prefs[li] = lex.substring(0, Math.min(li + 1, lex.length()));
+    }
+    return prefs;
+  }
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
index a17fd47..c626fd9 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/featuregen/SuffixFeatureGenerator.java
@@ -21,21 +21,36 @@ import java.util.List;
 
 public class SuffixFeatureGenerator implements AdaptiveFeatureGenerator {
 
-  private static final int SUFFIX_LENGTH = 4;
-
-  public static String[] getSuffixes(String lex) {
-    String[] suffs = new String[SUFFIX_LENGTH];
-    for (int li = 0; li < SUFFIX_LENGTH; li++) {
-      suffs[li] = lex.substring(Math.max(lex.length() - li - 1, 0));
-    }
-    return suffs;
+  static final int DEFAULT_MAX_LENGTH = 4;
+    
+  private final int suffixLength;
+  
+  public SuffixFeatureGenerator() {
+    suffixLength = DEFAULT_MAX_LENGTH;
+  }
+  
+  public SuffixFeatureGenerator(int suffixLength) {
+    this.suffixLength = suffixLength;
   }
 
+  @Override
   public void createFeatures(List<String> features, String[] tokens, int index,
       String[] previousOutcomes) {
-    String[] suffs = SuffixFeatureGenerator.getSuffixes(tokens[index]);
+    String[] suffs = getSuffixes(tokens[index]);
     for (String suff : suffs) {
       features.add("suf=" + suff);
     }
   }
+  
+  private String[] getSuffixes(String lex) {
+      
+    int suffixes = Math.min(suffixLength, lex.length());
+      
+    String[] suffs = new String[suffixes];
+    for (int li = 0; li < suffixes; li++) {
+      suffs[li] = lex.substring(Math.max(lex.length() - li - 1, 0));
+    }
+    return suffs;
+  }
+  
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java
new file mode 100644
index 0000000..5639174
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/PrefixFeatureGeneratorTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class PrefixFeatureGeneratorTest {
+
+  private List<String> features;
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void lengthTest1() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 0;
+    int suffixLength = 2;
+      
+    AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+    
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("pre=T", features.get(0));
+    Assert.assertEquals("pre=Th", features.get(1));
+    
+  }
+  
+  @Test
+  public void lengthTest2() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 3;
+    int suffixLength = 5;
+      
+    AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+    
+    Assert.assertEquals(5, features.size());
+    Assert.assertEquals("pre=e", features.get(0));
+    Assert.assertEquals("pre=ex", features.get(1));
+    Assert.assertEquals("pre=exa", features.get(2));
+    Assert.assertEquals("pre=exam", features.get(3));
+    Assert.assertEquals("pre=examp", features.get(4));
+    
+  }
+  
+  @Test
+  public void lengthTest3() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 1;
+    int suffixLength = 5;
+      
+    AdaptiveFeatureGenerator generator = new PrefixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+        
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("pre=i", features.get(0));
+    Assert.assertEquals("pre=is", features.get(1));
+    
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/1cd2658d/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java
new file mode 100644
index 0000000..fcb23a6
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/featuregen/SuffixFeatureGeneratorTest.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.featuregen;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+public class SuffixFeatureGeneratorTest {
+
+  private List<String> features;
+
+  @Before
+  public void setUp() throws Exception {
+    features = new ArrayList<>();
+  }
+
+  @Test
+  public void lengthTest1() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 0;
+    int suffixLength = 2;
+      
+    AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("suf=s", features.get(0));
+    Assert.assertEquals("suf=is", features.get(1));
+    
+  }
+  
+  @Test
+  public void lengthTest2() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 3;
+    int suffixLength = 5;
+      
+    AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+
+    Assert.assertEquals(5, features.size());
+    Assert.assertEquals("suf=e", features.get(0));
+    Assert.assertEquals("suf=le", features.get(1));
+    Assert.assertEquals("suf=ple", features.get(2));
+    Assert.assertEquals("suf=mple", features.get(3));
+    Assert.assertEquals("suf=ample", features.get(4));
+    
+  }
+  
+  @Test
+  public void lengthTest3() {
+      
+    String[] testSentence = new String[] {"This", "is", "an", "example", "sentence"};
+
+    int testTokenIndex = 1;
+    int suffixLength = 5;
+      
+    AdaptiveFeatureGenerator generator = new SuffixFeatureGenerator(suffixLength);    
+
+    generator.createFeatures(features, testSentence, testTokenIndex, null);
+    
+    Assert.assertEquals(2, features.size());
+    Assert.assertEquals("suf=s", features.get(0));
+    Assert.assertEquals("suf=is", features.get(1));
+    
+  }
+}