You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by al...@apache.org on 2006/12/07 23:40:30 UTC
svn commit: r483709 [5/5] - in /incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src: com/ibm/uima/examples/opennlp/ com/ibm/uima/examples/opennlp/annotator/ org/ org/apache/ org/apache/uima/ org/apache/uima/examples/ org/apache/uima...

Modified: incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/Parser.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/Parser.java?view=diff&rev=483709&r1=477887&r2=483709
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/Parser.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/Parser.java Thu Dec  7 14:40:23 2006
@@ -30,394 +30,368 @@
 import opennlp.tools.parser.ParserME;
 import opennlp.tools.util.Span;
 
-import org.apache.uima.analysis_engine.ResultSpecification;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
-import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
-import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
 import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
-import org.apache.uima.analysis_engine.annotator.JTextAnnotator_ImplBase;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.text.AnnotationIndex;
 import org.apache.uima.examples.opennlp.Sentence;
 import org.apache.uima.examples.opennlp.SyntaxAnnotation;
 import org.apache.uima.examples.opennlp.Token;
 import org.apache.uima.jcas.impl.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
 
 /**
- * UIMA Analysis Engine that invokes the OpenNLP Parser. The OpenNLP Parser
- * generates PennTreeBank style syntax tags. These tags are mapped into
- * annotation types according to the tag mapping table (MAPPINGS_PARAM)
- * parameter and corresponding annotations are created in the CAS. The directory
- * containing the various model files used by the OpenNLP Parser must also be
- * specified as a parameter (MODEL_DIR_PARAM).
+ * UIMA Analysis Engine that invokes the OpenNLP Parser. The OpenNLP Parser generates PennTreeBank
+ * style syntax tags. These tags are mapped into annotation types according to the tag mapping table
+ * (MAPPINGS_PARAM) parameter and corresponding annotations are created in the CAS. The directory
+ * containing the various model files used by the OpenNLP Parser must also be specified as a
+ * parameter (MODEL_DIR_PARAM).
  * 
  */
-public class Parser extends JTextAnnotator_ImplBase {
+public class Parser extends JCasAnnotator_ImplBase {
 
-    /** Parse tag mappings array parameter name. */
-    private static final String MAPPINGS_PARAM = "ParseTagMappings";
+  /** Parse tag mappings array parameter name. */
+  private static final String MAPPINGS_PARAM = "ParseTagMappings";
 
-    /** Model directory parameter name. */
-    private static final String MODEL_DIR_PARAM = "ModelDirectory";
+  /** Model directory parameter name. */
+  private static final String MODEL_DIR_PARAM = "ModelDirectory";
 
-    /** Use tag dictionary flag parameter name. */
-    private static final String USE_TAG_DICT_PARAM = "UseTagDictionary";
-
-    /** Case sensitive tag dictionary flag parameter name. */
-    private static final String CASE_INSESNITIVE_TD_PARAM = "CaseSensitiveTagDictionary";
-
-    /** Beam size paramter name. */
-    private static final String BEAM_SIZE_PARAM = "BeamSize";
-
-    /** Advance percentage parameter name. */
-    private static final String ADV_PERCENT_PARAM = "AdvancePercentage";
-
-    /** Name to use for this Analysis Engine component. */
-    private static final String COMPONENT_NAME = "OpenNLP Parser";
-
-    /** The OpenNLP parser */
-    private ParserME parser;
-
-    /**
-     * Hashtable for characters that must be escaped because they have special
-     * meaning for the parser.
-     */
-    private Hashtable escapeMap = new Hashtable();
+  /** Use tag dictionary flag parameter name. */
+  private static final String USE_TAG_DICT_PARAM = "UseTagDictionary";
+
+  /** Case sensitive tag dictionary flag parameter name. */
+  private static final String CASE_INSESNITIVE_TD_PARAM = "CaseSensitiveTagDictionary";
+
+  /** Beam size paramter name. */
+  private static final String BEAM_SIZE_PARAM = "BeamSize";
+
+  /** Advance percentage parameter name. */
+  private static final String ADV_PERCENT_PARAM = "AdvancePercentage";
+
+  /** Name to use for this Analysis Engine component. */
+  private static final String COMPONENT_NAME = "OpenNLP Parser";
+
+  /** The OpenNLP parser */
+  private ParserME parser;
+
+  /**
+   * Hashtable for characters that must be escaped because they have special meaning for the parser.
+   */
+  private Hashtable escapeMap = new Hashtable();
+
+  /**
+   * Table to keep track of span offsets when characters are escaped. Required to properly set spans
+   * in parse annotations.
+   */
+  private OffsetMap offsetMap = new OffsetMap();
+
+  /**
+   * Hash that maps parse tags to the constructor for the corresponding annotation type class.
+   */
+  private Hashtable parseTagMap = new Hashtable();
+
+  /**
+   * Initialize the Annotator.
+   * 
+   * @see JCasAnnotator_ImplBase#initialize(UimaContext)
+   */
+  public void initialize(UimaContext aContext) throws ResourceInitializationException {
+    super.initialize(aContext);
+
+    try {
+      String[] mappingStrings = null;
+
+      mappingStrings = (String[]) aContext.getConfigParameterValue(MAPPINGS_PARAM);
+      if (mappingStrings == null) {
+        throw new AnnotatorConfigurationException();
+      }
+      loadMappings(mappingStrings);
+
+      String modelDirName = (String) aContext.getConfigParameterValue(MODEL_DIR_PARAM);
+
+      File modelDir = new File(modelDirName);
+      if (!modelDir.isDirectory()) {
+        throw new AnnotatorConfigurationException();
+      }
+
+      // set parameter defaults
+      boolean useTagDictionary = false;
+      boolean caseSensitiveTagDictionary = false;
+      int beamSize = ParserME.defaultBeamSize;
+      double advancePercentage = ParserME.defaultAdvancePercentage;
+
+      Boolean useTagDictP = (Boolean) aContext.getConfigParameterValue(USE_TAG_DICT_PARAM);
+      if (useTagDictP != null)
+        useTagDictionary = useTagDictP.booleanValue();
+      Boolean caseSensitiveTagDictP = (Boolean) aContext
+              .getConfigParameterValue(CASE_INSESNITIVE_TD_PARAM);
+      if (caseSensitiveTagDictP != null)
+        caseSensitiveTagDictionary = caseSensitiveTagDictP.booleanValue();
+      Integer beamSizeInt = (Integer) aContext.getConfigParameterValue(BEAM_SIZE_PARAM);
+      if (beamSizeInt != null)
+        beamSize = beamSizeInt.intValue();
+      Float advPercentFlt = (Float) aContext.getConfigParameterValue(ADV_PERCENT_PARAM);
+      if (advPercentFlt != null)
+        advancePercentage = advPercentFlt.doubleValue();
+
+      parser = TreebankParser.getParser(modelDirName, useTagDictionary, caseSensitiveTagDictionary,
+              beamSize, advancePercentage);
+    } catch (Exception e) {
+      throw new ResourceInitializationException(e);
+    }
+    initEscapeMap();
+  }
 
-    /**
-     * Table to keep track of span offsets when characters are escaped. Required to
-     * properly set spans in parse annotations.
-     */
-    private OffsetMap offsetMap = new OffsetMap();
+  /**
+   * Processes the parse tag mappaings parameter. The constructor for each class identified in the
+   * array is loaded and stored in the mapping hashtable, using the label provided in the parameter
+   * as the key.
+   * 
+   * @param mappingStrings
+   *          Array of mapping strings of the form "tag,class"
+   * @throws AnnotatorConfigurationException
+   */
+  private void loadMappings(String[] mappingStrings) throws AnnotatorConfigurationException {
+    // populate the mappings hash table (key: parse tag,CAS Annotation Type
+    // Constructor)
+    for (int i = 0; i < mappingStrings.length; i++) {
+      String[] mappingPair = mappingStrings[i].split(",");
+      if (mappingPair.length < 2)
+        throw new AnnotatorConfigurationException();
+
+      String parseTag = mappingPair[0];
+      String className = mappingPair[1];
+
+      Constructor annotationConstructor;
+      // get the name of the JCAS type with this name
+      Class annotationClass;
+      try {
+        annotationClass = Class.forName(className);
+        // get the constructor for that JCAS type
+        annotationConstructor = annotationClass.getConstructor(new Class[] { JCas.class });
+      } catch (Exception e) {
+        throw new AnnotatorConfigurationException(e);
+      }
+      parseTagMap.put(parseTag, annotationConstructor);
+    }
+  }
 
-    /**
-     * Hash that maps parse tags to the constructor for the corresponding
-     * annotation type class.
-     */
-    private Hashtable parseTagMap = new Hashtable();
+  /**
+   * Process a CAS.
+   * 
+   * @see JCasAnnotator_ImplBase#process(JCas)
+   */
+  public void process(JCas aJCas) throws AnalysisEngineProcessException {
+
+    ArrayList wordList = new ArrayList();
+    StringBuffer sentenceBuffer = new StringBuffer();
+    offsetMap.clear();
+
+    AnnotationIndex sentenceIndex = (AnnotationIndex) aJCas.getJFSIndexRepository()
+            .getAnnotationIndex(Sentence.type);
+    AnnotationIndex tokenIndex = (AnnotationIndex) aJCas.getJFSIndexRepository()
+            .getAnnotationIndex(Token.type);
+
+    // iterate over Sentences
+    FSIterator sentenceIterator = sentenceIndex.iterator();
+    while (sentenceIterator.hasNext()) {
+      Sentence sentence = (Sentence) sentenceIterator.next();
+
+      wordList.clear();
+      sentenceBuffer.setLength(0);
+
+      int mapIdx = 0;
+
+      // iterate over Tokens
+      FSIterator tokenIterator = tokenIndex.subiterator(sentence);
+      while (tokenIterator.hasNext()) {
+        Token token = (Token) tokenIterator.next();
+
+        String word = escapeToken(token.getCoveredText());
+
+        int start = sentenceBuffer.length();
+        int end = start + word.length();
+
+        int origIdx = token.getBegin();
+        for (mapIdx = start; mapIdx <= end; mapIdx++) {
+          offsetMap.putMapping(mapIdx, origIdx);
+          if (origIdx < token.getEnd())
+            origIdx++;
+        }
 
-    /*
-     * (non-Javadoc)
-     * 
-     * @see org.apache.uima.analysis_engine.annotator.BaseAnnotator#initialize(org.apache.uima.analysis_engine.annotator.AnnotatorContext)
-     */
-    public void initialize(AnnotatorContext aContext)
-            throws AnnotatorInitializationException,
-            AnnotatorConfigurationException {
-        super.initialize(aContext);
+        sentenceBuffer.append(word + " ");
+        wordList.add(word);
+      }
+
+      if (sentenceBuffer.length() == 0) // check for empty sentence
+        continue;
+
+      String sentenceText = sentenceBuffer.substring(0, sentenceBuffer.length() - 1);
+
+      Parse parse = new Parse(sentenceText, new Span(0, sentenceText.length()), "INC", 1, null);
+
+      int tokenStart = 0;
+      int tokenEnd = 0;
+      Iterator wordIterator = wordList.iterator();
+      while (wordIterator.hasNext()) {
+        String word = (String) wordIterator.next();
+        tokenEnd = tokenStart + word.length();
+        parse.insert(new Parse(sentenceText, new Span(tokenStart, tokenEnd), ParserME.TOK_NODE, 0));
+        tokenStart = tokenEnd + 1; // advance past space
+      }
+      parse = parser.parse(parse);
+
+      makeAnnotations(parse, aJCas);
+
+      // parse.show();
+      // System.out.println("");
+      // System.out.println(show(parse));
+    }
+  }
 
+  /**
+   * Initializes the table of characters that must be "escaped". These characters have special
+   * meaning to the parser, so they are replaced with a special string, which is understood by the
+   * parser to represent that character.
+   */
+  private void initEscapeMap() {
+    escapeMap.put("(", "-LRB-");
+    escapeMap.put(")", "-RRB-");
+    escapeMap.put("{", "-LCB-");
+    escapeMap.put("}", "-RCB-");
+    escapeMap.put("[", "-LSB-");
+    escapeMap.put("]", "-RSB-");
+  }
+
+  /**
+   * Escape the input token, if necessary. Consult the EscapeMap to see if the input token is a
+   * character that must be escaped and, if so, return the escape sequence. Otherwise, return the
+   * input token.
+   * 
+   * @param token
+   *          The token to escape.
+   * @return If token must be escaped, then the escaped token, otherwise the original token.
+   */
+  private String escapeToken(String token) {
+    String newToken = (String) escapeMap.get(token);
+    if (newToken == null)
+      return token;
+    return newToken;
+  }
+
+  /**
+   * Create the parse annotations in the CAS corresponding to the results of the OpenNLP parse.
+   * 
+   * @param parse
+   *          The parse generated by the OpenNLP parser.
+   * @param jCas
+   *          The JCas in which to create the annotations.
+   * @throws AnnotatorProcessException
+   */
+  private void makeAnnotations(Parse parse, JCas jCas) throws AnalysisEngineProcessException {
+    Span span = parse.getSpan();
+    String tag = parse.getType();
+    if (!tag.equals(ParserME.TOK_NODE)) {
+
+      // make the annotation
+      int start = offsetMap.getMapping(span.getStart());
+      int end = offsetMap.getMapping(span.getEnd());
+      Constructor annotationMaker = (Constructor) parseTagMap.get(tag);
+      if (annotationMaker != null) {
+        SyntaxAnnotation syntaxAnnot;
         try {
-            String[] mappingStrings = null;
-
-            mappingStrings = (String[]) aContext
-                    .getConfigParameterValue(MAPPINGS_PARAM);
-            if (mappingStrings == null) {
-                throw new AnnotatorConfigurationException();
-            }
-            loadMappings(mappingStrings);
-
-            String modelDirName = (String) aContext
-                    .getConfigParameterValue(MODEL_DIR_PARAM);
-
-            File modelDir = new File(modelDirName);
-            if (!modelDir.isDirectory()) {
-                throw new AnnotatorConfigurationException();
-            }
-
-            // set parameter defaults
-            boolean useTagDictionary = false;
-            boolean caseSensitiveTagDictionary = false;
-            int beamSize = ParserME.defaultBeamSize;
-            double advancePercentage = ParserME.defaultAdvancePercentage;
-
-            Boolean useTagDictP = (Boolean) aContext
-                    .getConfigParameterValue(USE_TAG_DICT_PARAM);
-            if (useTagDictP != null)
-                useTagDictionary = useTagDictP.booleanValue();
-            Boolean caseSensitiveTagDictP = (Boolean) aContext
-                    .getConfigParameterValue(CASE_INSESNITIVE_TD_PARAM);
-            if (caseSensitiveTagDictP != null)
-                caseSensitiveTagDictionary = caseSensitiveTagDictP
-                        .booleanValue();
-            Integer beamSizeInt = (Integer) aContext
-                    .getConfigParameterValue(BEAM_SIZE_PARAM);
-            if (beamSizeInt != null)
-                beamSize = beamSizeInt.intValue();
-            Float advPercentFlt = (Float) aContext
-                    .getConfigParameterValue(ADV_PERCENT_PARAM);
-            if (advPercentFlt != null)
-                advancePercentage = advPercentFlt.doubleValue();
-
-            parser = TreebankParser.getParser(modelDirName,
-                    useTagDictionary, caseSensitiveTagDictionary, beamSize,
-                    advancePercentage);
+          syntaxAnnot = (SyntaxAnnotation) annotationMaker.newInstance(new Object[] { jCas });
         } catch (Exception e) {
-            throw new AnnotatorInitializationException(e);
+          throw new AnalysisEngineProcessException(e);
         }
-        initEscapeMap();
+        syntaxAnnot.setBegin(start);
+        syntaxAnnot.setEnd(end);
+        syntaxAnnot.setComponentId(COMPONENT_NAME);
+        syntaxAnnot.addToIndexes();
+      }
+      Parse[] children = parse.getChildren();
+      for (int i = 0; i < children.length; i++) {
+        makeAnnotations(children[i], jCas);
+      }
     }
+  }
 
-    /**
-     * Processes the parse tag mappaings parameter. The constructor for each
-     * class identified in the array is loaded and stored in the mapping
-     * hashtable, using the label provided in the parameter as the key.
-     * 
-     * @param mappingStrings
-     *            Array of mapping strings of the form "tag,class"
-     * @throws AnnotatorConfigurationException
-     */
-    private void loadMappings(String[] mappingStrings)
-            throws AnnotatorConfigurationException {
-        // populate the mappings hash table (key: parse tag,CAS Annotation Type
-        // Constructor)
-        for (int i = 0; i < mappingStrings.length; i++) {
-            String[] mappingPair = mappingStrings[i].split(",");
-            if (mappingPair.length < 2)
-                throw new AnnotatorConfigurationException();
-
-            String parseTag = mappingPair[0];
-            String className = mappingPair[1];
-
-            Constructor annotationConstructor;
-            // get the name of the JCAS type with this name
-            Class annotationClass;
-            try {
-                annotationClass = Class.forName(className);
-                // get the constructor for that JCAS type
-                annotationConstructor = annotationClass
-                        .getConstructor(new Class[] { JCas.class });
-            } catch (Exception e) {
-                throw new AnnotatorConfigurationException(e);
-            }
-            parseTagMap.put(parseTag, annotationConstructor);
-        }
+  public String show(Parse parse) {
+    Span span = parse.getSpan();
+    if (parse.getType().equals(ParserME.TOK_NODE)) {
+      return (parse.getText().substring(span.getStart(), span.getEnd()));
     }
-
-    /*
-     * (non-Javadoc)
-     * 
-     * @see org.apache.uima.analysis_engine.annotator.JTextAnnotator#process(org.apache.uima.jcas.impl.JCas,
-     *      org.apache.uima.analysis_engine.ResultSpecification)
-     */
-    public void process(JCas aJCas, ResultSpecification aResultSpec)
-            throws AnnotatorProcessException {
-
-        ArrayList wordList = new ArrayList();
-        StringBuffer sentenceBuffer = new StringBuffer();
-        offsetMap.clear();
-
-        AnnotationIndex sentenceIndex = (AnnotationIndex) aJCas
-                .getJFSIndexRepository().getAnnotationIndex(Sentence.type);
-        AnnotationIndex tokenIndex = (AnnotationIndex) aJCas
-                .getJFSIndexRepository().getAnnotationIndex(Token.type);
-
-        // iterate over Sentences
-        FSIterator sentenceIterator = sentenceIndex.iterator();
-        while (sentenceIterator.hasNext()) {
-            Sentence sentence = (Sentence) sentenceIterator.next();
-
-            wordList.clear();
-            sentenceBuffer.setLength(0);
-
-            int mapIdx = 0;
-
-            // iterate over Tokens
-            FSIterator tokenIterator = tokenIndex.subiterator(sentence);
-            while (tokenIterator.hasNext()) {
-                Token token = (Token) tokenIterator.next();
-
-                String word = escapeToken(token.getCoveredText());
-
-                int start = sentenceBuffer.length();
-                int end = start + word.length();
-
-                int origIdx = token.getBegin();
-                for (mapIdx = start; mapIdx <= end; mapIdx++) {
-                    offsetMap.putMapping(mapIdx, origIdx);
-                    if (origIdx < token.getEnd())
-                        origIdx++;
-                }
-
-                sentenceBuffer.append(word + " ");
-                wordList.add(word);
-            }
-            
-            if (sentenceBuffer.length() == 0)  // check for empty sentence
-                continue;
-            
-            String sentenceText = sentenceBuffer.substring(0, sentenceBuffer
-                    .length() - 1);
-
-            Parse parse = new Parse(sentenceText, new Span(0, sentenceText
-                    .length()), "INC", 1, null);
-
-            int tokenStart = 0;
-            int tokenEnd = 0;
-            Iterator wordIterator = wordList.iterator();
-            while (wordIterator.hasNext()) {
-                String word = (String) wordIterator.next();
-                tokenEnd = tokenStart + word.length();
-                parse.insert(new Parse(sentenceText, new Span(tokenStart,
-                        tokenEnd), ParserME.TOK_NODE, 0));
-                tokenStart = tokenEnd + 1; // advance past space
-            }
-            parse = parser.parse(parse);
-
-            makeAnnotations(parse, aJCas);
-
-            // parse.show();
-            // System.out.println("");
-            // System.out.println(show(parse));
-        }
+    Parse[] children = parse.getChildren();
+    if (children.length == 1) {
+      Parse childParse = children[0];
+      if (childParse.getType().equals(ParserME.TOK_NODE)) {
+        return (show(childParse) + "/" + parse.getType());
+      }
     }
-
-    /**
-     * Initializes the table of characters that must be "escaped". These
-     * characters have special meaning to the parser, so they are replaced with
-     * a special string, which is understood by the parser to represent that
-     * character.
-     */
-    private void initEscapeMap() {
-        escapeMap.put("(", "-LRB-");
-        escapeMap.put(")", "-RRB-");
-        escapeMap.put("{", "-LCB-");
-        escapeMap.put("}", "-RCB-");
-        escapeMap.put("[", "-LSB-");
-        escapeMap.put("]", "-RSB-");
+    String retVal = "(" + parse.getType() + " ";
+    for (int i = 0; i < children.length; i++) {
+      retVal += show(children[i]) + " ";
     }
+    return (retVal + ")");
+  }
 
-    /**
-     * Escape the input token, if necessary. Consult the EscapeMap to see if the
-     * input token is a character that must be escaped and, if so, return the
-     * escape sequence. Otherwise, return the input token.
-     * 
-     * @param token
-     *            The token to escape.
-     * @return If token must be escaped, then the escaped token, otherwise the
-     *         original token.
-     */
-    private String escapeToken(String token) {
-        String newToken = (String) escapeMap.get(token);
-        if (newToken == null)
-            return token;
-        return newToken;
+  public void printParse(Parse parse, String prefix) {
+    System.out.println(prefix + "Label: " + parse.getLabel());
+    System.out.println(prefix + "Type: " + parse.getType());
+    Span span = parse.getSpan();
+    System.out.println(prefix + "Span: " + span.getStart() + ":" + span.getEnd());
+    System.out.println(prefix + "Text: "
+            + parse.getText().substring(span.getStart(), span.getEnd()));
+    Parse[] children = parse.getChildren();
+    for (int i = 0; i < children.length; i++) {
+      printParse(children[i], prefix + "  ");
     }
 
+  }
+
+  /**
+   * Private class to hold span offset mappings. When the input text contains special characters
+   * that must be escaped, the escape sequences are longer than the original text. This table keeps
+   * track of modified span offsets so that the results of the parse (performed on the
+   * length-modified text) can be mapped back to the original text spans.
+   */
+  private class OffsetMap extends ArrayList {
+
+    private static final long serialVersionUID = 1L;
+
     /**
-     * Create the parse annotations in the CAS corresponding to the results of
-     * the OpenNLP parse.
+     * Store a span mapping in the table.
      * 
-     * @param parse
-     *            The parse generated by the OpenNLP parser.
-     * @param jCas
-     *            The JCas in which to create the annotations.
-     * @throws AnnotatorProcessException
+     * @param index
+     *          The new offset.
+     * @param offset
+     *          The original offset.
      */
-    private void makeAnnotations(Parse parse, JCas jCas)
-            throws AnnotatorProcessException {
-        Span span = parse.getSpan();
-        String tag = parse.getType();
-        if (!tag.equals(ParserME.TOK_NODE)) {
-
-            // make the annotation
-            int start = offsetMap.getMapping(span.getStart());
-            int end = offsetMap.getMapping(span.getEnd());
-            Constructor annotationMaker = (Constructor) parseTagMap.get(tag);
-            if (annotationMaker != null) {
-                SyntaxAnnotation syntaxAnnot;
-                try {
-                    syntaxAnnot = (SyntaxAnnotation) annotationMaker
-                            .newInstance(new Object[] { jCas });
-                } catch (Exception e) {
-                    throw new AnnotatorProcessException(e);
-                }
-                syntaxAnnot.setBegin(start);
-                syntaxAnnot.setEnd(end);
-                syntaxAnnot.setComponentId(COMPONENT_NAME);
-                syntaxAnnot.addToIndexes();
-            }
-            Parse[] children = parse.getChildren();
-            for (int i = 0; i < children.length; i++) {
-                makeAnnotations(children[i], jCas);
-            }
-        }
-    }
-
-    public String show(Parse parse) {
-        Span span = parse.getSpan();
-        if (parse.getType().equals(ParserME.TOK_NODE)) {
-            return (parse.getText().substring(span.getStart(), span.getEnd()));
-        }
-        Parse[] children = parse.getChildren();
-        if (children.length == 1) {
-            Parse childParse = children[0];
-            if (childParse.getType().equals(ParserME.TOK_NODE)) {
-                return (show(childParse) + "/" + parse.getType());
-            }
-        }
-        String retVal = "(" + parse.getType() + " ";
-        for (int i = 0; i < children.length; i++) {
-            retVal += show(children[i]) + " ";
-        }
-        return (retVal + ")");
-    }
-
-    public void printParse(Parse parse, String prefix) {
-        System.out.println(prefix + "Label: " + parse.getLabel());
-        System.out.println(prefix + "Type: " + parse.getType());
-        Span span = parse.getSpan();
-        System.out.println(prefix + "Span: " + span.getStart() + ":"
-                + span.getEnd());
-        System.out.println(prefix + "Text: "
-                + parse.getText().substring(span.getStart(), span.getEnd()));
-        Parse[] children = parse.getChildren();
-        for (int i = 0; i < children.length; i++) {
-            printParse(children[i], prefix + "  ");
-        }
+    public void putMapping(int index, int offset) {
+      Integer element = new Integer(offset);
 
+      if (index < size()) {
+        set(index, element);
+      } else {
+        for (int i = size(); i < index; i++)
+          add(null);
+        add(element);
+      }
     }
 
     /**
-     * Private class to hold span offset mappings. When the input text contains
-     * special characters that must be escaped, the escape sequences are longer
-     * than the original text. This table keeps track of modified span offsets
-     * so that the results of the parse (performed on the length-modified text)
-     * can be mapped back to the original text spans.
+     * Retrieve a span mapping from the table.
+     * 
+     * @param index
+     *          The new offset.
+     * @return The original offset.
      */
-    private class OffsetMap extends ArrayList {
-
-        private static final long serialVersionUID = 1L;
-
-        /**
-         * Store a span mapping in the table.
-         * 
-         * @param index
-         *            The new offset.
-         * @param offset
-         *            The original offset.
-         */
-        public void putMapping(int index, int offset) {
-            Integer element = new Integer(offset);
-
-            if (index < size()) {
-                set(index, element);
-            } else {
-                for (int i = size(); i < index; i++)
-                    add(null);
-                add(element);
-            }
-        }
-
-        /**
-         * Retrieve a span mapping from the table.
-         * 
-         * @param index
-         *            The new offset.
-         * @return The original offset.
-         */
-        public int getMapping(int index) {
-            Integer element = (Integer) get(index);
-            return element.intValue();
-        }
+    public int getMapping(int index) {
+      Integer element = (Integer) get(index);
+      return element.intValue();
     }
+  }
 }

Modified: incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/SentenceDetector.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/SentenceDetector.java?view=diff&rev=483709&r1=477887&r2=483709
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/SentenceDetector.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/SentenceDetector.java Thu Dec  7 14:40:23 2006
@@ -21,69 +21,57 @@
 
 import java.io.IOException;
 
-import org.apache.uima.analysis_engine.ResultSpecification;
-import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
-import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
-import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
-import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
-import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
-import org.apache.uima.analysis_engine.annotator.JTextAnnotator_ImplBase;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.examples.opennlp.Sentence;
 import org.apache.uima.jcas.impl.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
 
 /**
- * Simple Annotator to detect sentences and create Sentence annotations in the
- * CAS. Uses the OpenNLP MaxEnt Sentence Detector.
+ * Simple Annotator to detect sentences and create Sentence annotations in the CAS. Uses the OpenNLP
+ * MaxEnt Sentence Detector.
  * 
  */
-public class SentenceDetector extends JTextAnnotator_ImplBase {
+public class SentenceDetector extends JCasAnnotator_ImplBase {
 
-    public static final String MODEL_FILE_PARAM = "ModelFile";
-    
-    public static final String COMPONENT_ID = "OpenNLP Sentence Detector";
-
-    private opennlp.tools.lang.english.SentenceDetector sentenceDetector;
-
-    /*
-     * (non-Javadoc)
-     * 
-     * @see org.apache.uima.analysis_engine.annotator.BaseAnnotator#initialize(org.apache.uima.analysis_engine.annotator.AnnotatorContext)
-     */
-    public void initialize(AnnotatorContext aContext)
-            throws AnnotatorInitializationException,
-            AnnotatorConfigurationException {
-        super.initialize(aContext);
-        String modelFile;
-        try {
-            modelFile = (String) aContext
-                    .getConfigParameterValue(MODEL_FILE_PARAM);
-        } catch (AnnotatorContextException e) {
-            throw new AnnotatorConfigurationException(e);
-        }
-        try {
-            sentenceDetector = new opennlp.tools.lang.english.SentenceDetector(modelFile);
-        } catch (IOException e) {
-            throw new AnnotatorInitializationException(e);
-        }
+  public static final String MODEL_FILE_PARAM = "ModelFile";
+
+  public static final String COMPONENT_ID = "OpenNLP Sentence Detector";
+
+  private opennlp.tools.lang.english.SentenceDetector sentenceDetector;
+
+  /**
+   * Initialize the Annotator.
+   * 
+   * @see JCasAnnotator_ImplBase#initialize(UimaContext)
+   */
+  public void initialize(UimaContext aContext) throws ResourceInitializationException {
+    super.initialize(aContext);
+    String modelFile;
+    modelFile = (String) aContext.getConfigParameterValue(MODEL_FILE_PARAM);
+    try {
+      sentenceDetector = new opennlp.tools.lang.english.SentenceDetector(modelFile);
+    } catch (IOException e) {
+      throw new ResourceInitializationException(e);
     }
+  }
 
-    /*
-     * (non-Javadoc)
-     * 
-     * @see org.apache.uima.analysis_engine.annotator.JTextAnnotator#process(org.apache.uima.jcas.impl.JCas,
-     *      org.apache.uima.analysis_engine.ResultSpecification)
-     */
-    public void process(JCas aJCas, ResultSpecification aResultSpec)
-            throws AnnotatorProcessException {
-        String docText = aJCas.getDocumentText();
-        int sentenceOffsets[] = sentenceDetector.sentPosDetect(docText);
-        int begin = 0;
-        for (int i = 0; i < sentenceOffsets.length; i++) {
-            Sentence sentence = new Sentence(aJCas, begin, sentenceOffsets[i]);
-            sentence.setComponentId(COMPONENT_ID);
-            sentence.addToIndexes();
-            begin = sentenceOffsets[i];
-        }
+  /**
+   * Process a CAS.
+   * 
+   * @see JCasAnnotator_ImplBase#process(JCas)
+   */
+  public void process(JCas aJCas) throws AnalysisEngineProcessException {
+    String docText = aJCas.getDocumentText();
+    int sentenceOffsets[] = sentenceDetector.sentPosDetect(docText);
+    int begin = 0;
+    for (int i = 0; i < sentenceOffsets.length; i++) {
+      Sentence sentence = new Sentence(aJCas, begin, sentenceOffsets[i]);
+      sentence.setComponentId(COMPONENT_ID);
+      sentence.addToIndexes();
+      begin = sentenceOffsets[i];
     }
+  }
 
 }

Modified: incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/Tokenizer.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/Tokenizer.java?view=diff&rev=483709&r1=477887&r2=483709
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/Tokenizer.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-examples/src/main/opennlp_wrappers/src/org/apache/uima/examples/opennlp/annotator/Tokenizer.java Thu Dec  7 14:40:23 2006
@@ -23,90 +23,76 @@
 
 import opennlp.tools.util.Span;
 
-import org.apache.uima.analysis_engine.ResultSpecification;
-import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
-import org.apache.uima.analysis_engine.annotator.AnnotatorContext;
-import org.apache.uima.analysis_engine.annotator.AnnotatorContextException;
-import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
-import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
-import org.apache.uima.analysis_engine.annotator.JTextAnnotator_ImplBase;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.text.AnnotationIndex;
 import org.apache.uima.examples.opennlp.Sentence;
 import org.apache.uima.examples.opennlp.Token;
 import org.apache.uima.jcas.impl.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
 
 /**
- * UIMA Analysis Engine that invokes the OpenNLP Tokenizer. The OpenNLP
- * Tokenizer generates a PennTreeBank style tokenization. This annotator assumes
- * that sentences have already been annotated in the CAS with Sentence
- * annotations. We iterate over sentences and invoke the OpenNLP Tokenizer on
- * each sentence. For each token, a Token annotation is created in the CAS. The
- * model file for the OpenNLP Tokenizer is specified as a parameter
- * (MODEL_FILE_PARAM).
+ * UIMA Analysis Engine that invokes the OpenNLP Tokenizer. The OpenNLP Tokenizer generates a
+ * PennTreeBank style tokenization. This annotator assumes that sentences have already been
+ * annotated in the CAS with Sentence annotations. We iterate over sentences and invoke the OpenNLP
+ * Tokenizer on each sentence. For each token, a Token annotation is created in the CAS. The model
+ * file for the OpenNLP Tokenizer is specified as a parameter (MODEL_FILE_PARAM).
  * 
  */
-public class Tokenizer extends JTextAnnotator_ImplBase {
+public class Tokenizer extends JCasAnnotator_ImplBase {
 
-    public static final String MODEL_FILE_PARAM = "ModelFile";
+  public static final String MODEL_FILE_PARAM = "ModelFile";
 
-    public static final String COMPONENT_ID = "OpenNLP Tokenizer";
+  public static final String COMPONENT_ID = "OpenNLP Tokenizer";
 
-    private opennlp.tools.lang.english.Tokenizer tokenizer;
+  private opennlp.tools.lang.english.Tokenizer tokenizer;
 
-    /*
-     * (non-Javadoc)
-     * 
-     * @see org.apache.uima.analysis_engine.annotator.BaseAnnotator#initialize(org.apache.uima.analysis_engine.annotator.AnnotatorContext)
-     */
-    public void initialize(AnnotatorContext aContext)
-            throws AnnotatorInitializationException,
-            AnnotatorConfigurationException {
-        super.initialize(aContext);
-        String modelFile;
-
-        try {
-            modelFile = (String) aContext
-                    .getConfigParameterValue(MODEL_FILE_PARAM);
-        } catch (AnnotatorContextException e) {
-            throw new AnnotatorConfigurationException(e);
-        }
-
-        try {
-            tokenizer = new opennlp.tools.lang.english.Tokenizer(modelFile);
-        } catch (IOException e) {
-            throw new AnnotatorInitializationException(e);
-        }
-    }
+  /**
+   * Initialize the Annotator.
+   * 
+   * @see JCasAnnotator_ImplBase#initialize(UimaContext)
+   */
+  public void initialize(UimaContext aContext) throws ResourceInitializationException {
+    super.initialize(aContext);
+    String modelFile;
+
+    modelFile = (String) aContext.getConfigParameterValue(MODEL_FILE_PARAM);
 
-    /*
-     * (non-Javadoc)
-     * 
-     * @see org.apache.uima.analysis_engine.annotator.JTextAnnotator#process(org.apache.uima.jcas.impl.JCas,
-     *      org.apache.uima.analysis_engine.ResultSpecification)
-     */
-    public void process(JCas aJCas, ResultSpecification aResultSpec)
-            throws AnnotatorProcessException {
-
-        AnnotationIndex sentenceIndex = (AnnotationIndex) aJCas
-                .getJFSIndexRepository().getAnnotationIndex(Sentence.type);
-
-        // iterate over Sentences
-        FSIterator sentenceIterator = sentenceIndex.iterator();
-        while (sentenceIterator.hasNext()) {
-            Sentence sentence = (Sentence) sentenceIterator.next();
-
-            String text = sentence.getCoveredText();
-            Span[] tokenSpans = tokenizer.tokenizePos(text);
-            for (int i = 0; i < tokenSpans.length; i++) {
-                Span span = tokenSpans[i];
-                Token token = new Token(aJCas);
-                token.setBegin(sentence.getBegin() + span.getStart());
-                token.setEnd(sentence.getBegin() + span.getEnd());
-                token.setComponentId(COMPONENT_ID);
-                token.addToIndexes();
-            }
-        }
+    try {
+      tokenizer = new opennlp.tools.lang.english.Tokenizer(modelFile);
+    } catch (IOException e) {
+      throw new ResourceInitializationException(e);
+    }
+  }
 
+  /**
+   * Process a CAS.
+   * 
+   * @see JCasAnnotator_ImplBase#process(JCas)
+   */
+  public void process(JCas aJCas) throws AnalysisEngineProcessException {
+
+    AnnotationIndex sentenceIndex = (AnnotationIndex) aJCas.getJFSIndexRepository()
+            .getAnnotationIndex(Sentence.type);
+
+    // iterate over Sentences
+    FSIterator sentenceIterator = sentenceIndex.iterator();
+    while (sentenceIterator.hasNext()) {
+      Sentence sentence = (Sentence) sentenceIterator.next();
+
+      String text = sentence.getCoveredText();
+      Span[] tokenSpans = tokenizer.tokenizePos(text);
+      for (int i = 0; i < tokenSpans.length; i++) {
+        Span span = tokenSpans[i];
+        Token token = new Token(aJCas);
+        token.setBegin(sentence.getBegin() + span.getStart());
+        token.setEnd(sentence.getBegin() + span.getEnd());
+        token.setComponentId(COMPONENT_ID);
+        token.addToIndexes();
+      }
     }
+
+  }
 }