You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2012/03/24 01:37:43 UTC

svn commit: r1304686 - /opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/DefaultTokenContextGenerator.java

Author: colen
Date: Sat Mar 24 00:37:42 2012
New Revision: 1304686

URL: http://svn.apache.org/viewvc?rev=1304686&view=rev
Log:
OPENNLP-483: Refactor the DefaultTokenContextGenerator to make it easier to create a sub-class

Modified:
    opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/DefaultTokenContextGenerator.java

Modified: opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/DefaultTokenContextGenerator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/DefaultTokenContextGenerator.java?rev=1304686&r1=1304685&r2=1304686&view=diff
==============================================================================
--- opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/DefaultTokenContextGenerator.java (original)
+++ opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/tokenize/DefaultTokenContextGenerator.java Sat Mar 24 00:37:42 2012
@@ -30,7 +30,7 @@ import opennlp.tools.util.StringUtil;
  */
 public class DefaultTokenContextGenerator implements TokenContextGenerator {
   
-  private final Set<String> inducedAbbreviations;
+  protected final Set<String> inducedAbbreviations;
   
   /**
    * Creates a default context generator for tokenizer.
@@ -52,6 +52,25 @@ public class DefaultTokenContextGenerato
    * @see opennlp.tools.tokenize.TokenContextGenerator#getContext(java.lang.String, int)
    */
   public String[] getContext(String sentence, int index) {
+    List<String> preds = createContext(sentence, index);
+    String[] context = new String[preds.size()];
+    preds.toArray(context);
+    return context;
+  }
+
+  /**
+   * Returns an {@link ArrayList} of features for the specified sentence string
+   * at the specified index. Extensions of this class can override this method
+   * to create a customized {@link TokenContextGenerator}
+   * 
+   * @param sentence
+   *          the token been analyzed
+   * @param index
+   *          the index of the character been analyzed
+   * @return an {@link ArrayList} of features for the specified sentence string
+   *         at the specified index.
+   */
+  protected List<String> createContext(String sentence, int index) {
     List<String> preds = new ArrayList<String>();
     String prefix = sentence.substring(0, index);
     String suffix = sentence.substring(index);
@@ -91,16 +110,14 @@ public class DefaultTokenContextGenerato
       preds.add("abb");
     }
 
-    String[] context = new String[preds.size()];
-    preds.toArray(context);
-    return context;
+    return preds;
   }
 
 
   /**
    * Helper function for getContext.
    */
-  private void addCharPreds(String key, char c, List<String> preds) {
+  protected void addCharPreds(String key, char c, List<String> preds) {
     preds.add(key + "=" + c);
     if (Character.isLetter(c)) {
       preds.add(key + "_alpha");