You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/11/07 20:07:50 UTC
svn commit: r1198880 [5/7] - in /incubator/opennlp/trunk: ./ opennlp-distr/ opennlp-distr/src/main/assembly/ opennlp-distr/src/main/readme/ opennlp-docs/ opennlp-docs/src/docbkx/ opennlp-docs/src/docbkx/css/ opennlp-docs/src/main/resources/xsl/ opennlp...

Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java Mon Nov  7 19:07:33 2011
@@ -1,346 +1,346 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */ 
-
-package opennlp.uima.parser;
-
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import opennlp.tools.parser.Parse;
-import opennlp.tools.parser.ParserFactory;
-import opennlp.tools.parser.ParserModel;
-import opennlp.tools.util.Span;
-import opennlp.uima.util.AnnotatorUtil;
-import opennlp.uima.util.ContainingConstraint;
-import opennlp.uima.util.UimaUtil;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.resource.ResourceAccessException;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-
-/**
- * Abstract base class for OpenNLP Parser annotators.
- * <p>
- * Mandatory parameters
- * <table border=1>
- *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- *   <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.ParseType</td> <td>The full name of the parse type</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.TypeFeature</td> <td>The name of the type feature</td></tr>
- * </table>
- * <p>
- * Optional parameters
- * <table border=1>
- *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- *   <tr><td>Integer</td> <td>opennlp.uima.BeamSize</td></tr>
- * </table>
- */
-public class Parser extends CasAnnotator_ImplBase {
- 
-  private static class ParseConverter {
-    private Map<Integer, Integer> mIndexMap = new HashMap<Integer, Integer>();
-    
-    private Parse mParseForTagger;
-    
-    private String mSentence;
-    
-    /**
-     * Initializes a new instance.
-     * 
-     * @param sentence
-     * @param tokens
-     */
-    public ParseConverter(String sentence, Span tokens[]) {
-      
-      mSentence = sentence;
-      
-      StringBuilder sentenceStringBuilder = new StringBuilder();
-      
-      String tokenList[] = new String[tokens.length];
-      
-      for (int i = 0; i < tokens.length; i++) {
-        String tokenString = tokens[i].getCoveredText(sentence).toString();
-        String escapedToken = escape(tokenString);
-        tokenList[i] = escapedToken;
-
-        int escapedStart = sentenceStringBuilder.length();
-        int start = tokens[i].getStart();
-        mIndexMap.put(new Integer(escapedStart), new Integer(start));
-
-        int escapedEnd = escapedStart + escapedToken.length();
-        int end = tokens[i].getEnd();
-        mIndexMap.put(new Integer(escapedEnd), new Integer(end));
-
-        sentenceStringBuilder.append(tokenList[i]);
-
-        sentenceStringBuilder.append(' ');
-      }
-      
-      // remove last space
-      sentenceStringBuilder.setLength(sentenceStringBuilder.length() - 1);
-      
-      String tokenizedSentence = sentenceStringBuilder.toString();
-      
-      mParseForTagger = new Parse(tokenizedSentence, 
-          new Span(0, tokenizedSentence.length()), "INC", 1, null);
-      
-      int start = 0;
-      
-      for (int i = 0; i < tokenList.length; i++) {
-
-        mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start,
-            start + tokenList[i].length()),
-            opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));
-
-        start += tokenList[i].length() + 1;
-      }
-    }
-    
-    private static String escape(String text) {
-      return text;
-    }
-    
-    /**
-     * Creates the parse for the tagger.
-     *  
-     * @return the parse which can be passed to the tagger
-     */
-    Parse getParseForTagger() {
-      return mParseForTagger;
-    }
-    
-    /**
-     * Converts the parse from the tagger back.
-     * 
-     * @param parseFromTagger
-     * @return the final parse
-     */
-    Parse transformParseFromTagger(Parse parseFromTagger) {
-      int start = parseFromTagger.getSpan().getStart();
-      int end = parseFromTagger.getSpan().getEnd();
-      
-      
-      Parse transformedParse = new Parse(mSentence, 
-          new Span(((Integer) mIndexMap.get(new Integer(start))).intValue(), 
-          ((Integer) mIndexMap.get(new Integer(end))).intValue()), 
-          parseFromTagger.getType(), 
-          parseFromTagger.getProb(), parseFromTagger.getHeadIndex());
-      
-      
-      Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren();
-      
-      // call this method for all childs ... 
-      for (int i = 0; i < parseFromTaggerChildrens.length; i++) {
-        
-        Parse child = parseFromTaggerChildrens[i];
-        
-        if (!child.getType().equals(
-            opennlp.tools.parser.chunking.Parser.TOK_NODE)) {
-        
-          // only insert if it has childs
-          if (child.getChildCount() > 0 && 
-              !child.getChildren()[0].getType().equals(opennlp.tools.parser.chunking.Parser.TOK_NODE)) {
-            transformedParse.insert(transformParseFromTagger(child));
-          }
-        }
-      }
-      
-      if (parseFromTagger.getType().equals("TOP")) {
-        return transformedParse.getChildren()[0];
-      }
-      else {
-        return transformedParse;
-      }
-    }
-    
-  }
-
-  private static final String PARSE_TYPE_PARAMETER = "opennlp.uima.ParseType";
-
-  public static final String TYPE_FEATURE_PARAMETER = 
-      "opennlp.uima.TypeFeature";
-  
-  protected UimaContext context;
-  
-  protected Logger mLogger;
-
-  private Type mSentenceType;
-
-  private Type mTokenType;
-
-  protected opennlp.tools.parser.Parser mParser;
-
-  private Type mParseType;
-
-  private Feature mTypeFeature;
-
-  /**
-   * Initializes the current instance with the given context.
-   */
-  public void initialize(UimaContext context)
-      throws ResourceInitializationException {
-
-    super.initialize(context);
-
-    this.context = context;
-
-    mLogger = context.getLogger();
-
-    if (mLogger.isLoggable(Level.INFO)) {
-      mLogger.log(Level.INFO, "Initializing the OpenNLP Parser.");
-    }
-
-    ParserModel model;
-
-    try {
-      ParserModelResource modelResource = (ParserModelResource) context
-          .getResourceObject(UimaUtil.MODEL_PARAMETER);
-
-      model = modelResource.getModel();
-    } catch (ResourceAccessException e) {
-      throw new ResourceInitializationException(e);
-    }
-
-    mParser = ParserFactory.create(model);
-  }
-  
-  /**
-   * Initializes the type system.
-   */
-  public void typeSystemInit(TypeSystem typeSystem)
-      throws AnalysisEngineProcessException {
-
-    mSentenceType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
-        UimaUtil.SENTENCE_TYPE_PARAMETER);
-
-    mTokenType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
-        UimaUtil.TOKEN_TYPE_PARAMETER);
-
-    mParseType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
-        PARSE_TYPE_PARAMETER);
-
-    mTypeFeature = AnnotatorUtil.getRequiredFeatureParameter(context,
-        mParseType, TYPE_FEATURE_PARAMETER, CAS.TYPE_NAME_STRING);
-  }
-  
-  /**
-   * Performs parsing on the given {@link CAS} object.
-   */
-  public void process(CAS cas) {
-    FSIndex<AnnotationFS> sentences = cas.getAnnotationIndex(mSentenceType);
-
-    Iterator<AnnotationFS> sentencesIterator = sentences.iterator();
-
-    while (sentencesIterator.hasNext()) {
-      AnnotationFS sentence = (AnnotationFS) sentencesIterator.next();
-
-      process(cas, sentence);
-    }
-  }
-  
-  protected void process(CAS cas, AnnotationFS sentenceAnnotation) {
-    FSIndex<AnnotationFS> allTokens = cas.getAnnotationIndex(mTokenType);
-    
-    ContainingConstraint containingConstraint = 
-        new ContainingConstraint(sentenceAnnotation);
-    
-    Iterator<AnnotationFS> containingTokens = cas.createFilteredIterator(
-        allTokens.iterator(), containingConstraint);
-  
-    StringBuilder sentenceStringBuilder = new StringBuilder();
-    
-    while (containingTokens.hasNext()) {
-      AnnotationFS token = (AnnotationFS) containingTokens.next();
-
-      sentenceStringBuilder.append(token.getCoveredText());
-
-      // attention the offsets moves inside the sentence...
-      sentenceStringBuilder.append(' ');
-    }
-     
-    String sentence = sentenceStringBuilder.toString();
-    sentence = sentenceAnnotation.getCoveredText();
-
-    containingTokens = cas.createFilteredIterator(
-        allTokens.iterator(), containingConstraint);
-   
-    List<Span> tokenSpans = new LinkedList<Span>();
-    
-    while(containingTokens.hasNext()) {
-      AnnotationFS token = (AnnotationFS) containingTokens.next();
-
-      tokenSpans.add(new Span(token.getBegin() - sentenceAnnotation.getBegin(), 
-          token.getEnd() - sentenceAnnotation.getBegin()));
-    }
-    
-    ParseConverter converter = new ParseConverter(sentence,(Span[]) 
-        tokenSpans.toArray(new Span[tokenSpans.size()]));
-    
-   Parse parse = mParser.parse(converter.getParseForTagger());
-  
-   parse = converter.transformParseFromTagger(parse);
-   
-   if (mLogger.isLoggable(Level.INFO)) {
-     StringBuffer parseString = new StringBuffer();
-     parse.show(parseString);
-     
-     mLogger.log(Level.INFO, parseString.toString());
-   }
-   
-   createAnnotation(cas, sentenceAnnotation.getBegin(), parse);
-  }
-  
-  protected void createAnnotation(CAS cas, int offset, Parse parse) {
-    
-    Parse parseChildrens[] = parse.getChildren();
-    
-    // do this for all children
-    for (int i = 0; i < parseChildrens.length; i++) {
-      Parse child = parseChildrens[i];
-      createAnnotation(cas, offset, child);
-    }
-    
-    AnnotationFS parseAnnotation = cas.createAnnotation(mParseType, offset + 
-        parse.getSpan().getStart(), offset + parse.getSpan().getEnd());
-    
-    parseAnnotation.setStringValue(mTypeFeature, parse.getType());
-    
-    cas.getIndexRepository().addFS(parseAnnotation);
-  }
-
-  /**
-   * Releases allocated resources.
-   */
-  public void destroy() {
-    mParser = null;
-  }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */ 
+
+package opennlp.uima.parser;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.tools.parser.Parse;
+import opennlp.tools.parser.ParserFactory;
+import opennlp.tools.parser.ParserModel;
+import opennlp.tools.util.Span;
+import opennlp.uima.util.AnnotatorUtil;
+import opennlp.uima.util.ContainingConstraint;
+import opennlp.uima.util.UimaUtil;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.resource.ResourceAccessException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+
+/**
+ * Abstract base class for OpenNLP Parser annotators.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.ParseType</td> <td>The full name of the parse type</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.TypeFeature</td> <td>The name of the type feature</td></tr>
+ * </table>
+ * <p>
+ * Optional parameters
+ * <table border=1>
+ *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ *   <tr><td>Integer</td> <td>opennlp.uima.BeamSize</td></tr>
+ * </table>
+ */
+public class Parser extends CasAnnotator_ImplBase {
+ 
+  private static class ParseConverter {
+    private Map<Integer, Integer> mIndexMap = new HashMap<Integer, Integer>();
+    
+    private Parse mParseForTagger;
+    
+    private String mSentence;
+    
+    /**
+     * Initializes a new instance.
+     * 
+     * @param sentence
+     * @param tokens
+     */
+    public ParseConverter(String sentence, Span tokens[]) {
+      
+      mSentence = sentence;
+      
+      StringBuilder sentenceStringBuilder = new StringBuilder();
+      
+      String tokenList[] = new String[tokens.length];
+      
+      for (int i = 0; i < tokens.length; i++) {
+        String tokenString = tokens[i].getCoveredText(sentence).toString();
+        String escapedToken = escape(tokenString);
+        tokenList[i] = escapedToken;
+
+        int escapedStart = sentenceStringBuilder.length();
+        int start = tokens[i].getStart();
+        mIndexMap.put(new Integer(escapedStart), new Integer(start));
+
+        int escapedEnd = escapedStart + escapedToken.length();
+        int end = tokens[i].getEnd();
+        mIndexMap.put(new Integer(escapedEnd), new Integer(end));
+
+        sentenceStringBuilder.append(tokenList[i]);
+
+        sentenceStringBuilder.append(' ');
+      }
+      
+      // remove last space
+      sentenceStringBuilder.setLength(sentenceStringBuilder.length() - 1);
+      
+      String tokenizedSentence = sentenceStringBuilder.toString();
+      
+      mParseForTagger = new Parse(tokenizedSentence, 
+          new Span(0, tokenizedSentence.length()), "INC", 1, null);
+      
+      int start = 0;
+      
+      for (int i = 0; i < tokenList.length; i++) {
+
+        mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start,
+            start + tokenList[i].length()),
+            opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));
+
+        start += tokenList[i].length() + 1;
+      }
+    }
+    
+    private static String escape(String text) {
+      return text;
+    }
+    
+    /**
+     * Creates the parse for the tagger.
+     *  
+     * @return the parse which can be passed to the tagger
+     */
+    Parse getParseForTagger() {
+      return mParseForTagger;
+    }
+    
+    /**
+     * Converts the parse from the tagger back.
+     * 
+     * @param parseFromTagger
+     * @return the final parse
+     */
+    Parse transformParseFromTagger(Parse parseFromTagger) {
+      int start = parseFromTagger.getSpan().getStart();
+      int end = parseFromTagger.getSpan().getEnd();
+      
+      
+      Parse transformedParse = new Parse(mSentence, 
+          new Span(((Integer) mIndexMap.get(new Integer(start))).intValue(), 
+          ((Integer) mIndexMap.get(new Integer(end))).intValue()), 
+          parseFromTagger.getType(), 
+          parseFromTagger.getProb(), parseFromTagger.getHeadIndex());
+      
+      
+      Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren();
+      
+      // call this method for all childs ... 
+      for (int i = 0; i < parseFromTaggerChildrens.length; i++) {
+        
+        Parse child = parseFromTaggerChildrens[i];
+        
+        if (!child.getType().equals(
+            opennlp.tools.parser.chunking.Parser.TOK_NODE)) {
+        
+          // only insert if it has childs
+          if (child.getChildCount() > 0 && 
+              !child.getChildren()[0].getType().equals(opennlp.tools.parser.chunking.Parser.TOK_NODE)) {
+            transformedParse.insert(transformParseFromTagger(child));
+          }
+        }
+      }
+      
+      if (parseFromTagger.getType().equals("TOP")) {
+        return transformedParse.getChildren()[0];
+      }
+      else {
+        return transformedParse;
+      }
+    }
+    
+  }
+
+  private static final String PARSE_TYPE_PARAMETER = "opennlp.uima.ParseType";
+
+  public static final String TYPE_FEATURE_PARAMETER = 
+      "opennlp.uima.TypeFeature";
+  
+  protected UimaContext context;
+  
+  protected Logger mLogger;
+
+  private Type mSentenceType;
+
+  private Type mTokenType;
+
+  protected opennlp.tools.parser.Parser mParser;
+
+  private Type mParseType;
+
+  private Feature mTypeFeature;
+
+  /**
+   * Initializes the current instance with the given context.
+   */
+  public void initialize(UimaContext context)
+      throws ResourceInitializationException {
+
+    super.initialize(context);
+
+    this.context = context;
+
+    mLogger = context.getLogger();
+
+    if (mLogger.isLoggable(Level.INFO)) {
+      mLogger.log(Level.INFO, "Initializing the OpenNLP Parser.");
+    }
+
+    ParserModel model;
+
+    try {
+      ParserModelResource modelResource = (ParserModelResource) context
+          .getResourceObject(UimaUtil.MODEL_PARAMETER);
+
+      model = modelResource.getModel();
+    } catch (ResourceAccessException e) {
+      throw new ResourceInitializationException(e);
+    }
+
+    mParser = ParserFactory.create(model);
+  }
+  
+  /**
+   * Initializes the type system.
+   */
+  public void typeSystemInit(TypeSystem typeSystem)
+      throws AnalysisEngineProcessException {
+
+    mSentenceType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
+        UimaUtil.SENTENCE_TYPE_PARAMETER);
+
+    mTokenType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
+        UimaUtil.TOKEN_TYPE_PARAMETER);
+
+    mParseType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
+        PARSE_TYPE_PARAMETER);
+
+    mTypeFeature = AnnotatorUtil.getRequiredFeatureParameter(context,
+        mParseType, TYPE_FEATURE_PARAMETER, CAS.TYPE_NAME_STRING);
+  }
+  
+  /**
+   * Performs parsing on the given {@link CAS} object.
+   */
+  public void process(CAS cas) {
+    FSIndex<AnnotationFS> sentences = cas.getAnnotationIndex(mSentenceType);
+
+    Iterator<AnnotationFS> sentencesIterator = sentences.iterator();
+
+    while (sentencesIterator.hasNext()) {
+      AnnotationFS sentence = (AnnotationFS) sentencesIterator.next();
+
+      process(cas, sentence);
+    }
+  }
+  
+  protected void process(CAS cas, AnnotationFS sentenceAnnotation) {
+    FSIndex<AnnotationFS> allTokens = cas.getAnnotationIndex(mTokenType);
+    
+    ContainingConstraint containingConstraint = 
+        new ContainingConstraint(sentenceAnnotation);
+    
+    Iterator<AnnotationFS> containingTokens = cas.createFilteredIterator(
+        allTokens.iterator(), containingConstraint);
+  
+    StringBuilder sentenceStringBuilder = new StringBuilder();
+    
+    while (containingTokens.hasNext()) {
+      AnnotationFS token = (AnnotationFS) containingTokens.next();
+
+      sentenceStringBuilder.append(token.getCoveredText());
+
+      // attention the offsets moves inside the sentence...
+      sentenceStringBuilder.append(' ');
+    }
+     
+    String sentence = sentenceStringBuilder.toString();
+    sentence = sentenceAnnotation.getCoveredText();
+
+    containingTokens = cas.createFilteredIterator(
+        allTokens.iterator(), containingConstraint);
+   
+    List<Span> tokenSpans = new LinkedList<Span>();
+    
+    while(containingTokens.hasNext()) {
+      AnnotationFS token = (AnnotationFS) containingTokens.next();
+
+      tokenSpans.add(new Span(token.getBegin() - sentenceAnnotation.getBegin(), 
+          token.getEnd() - sentenceAnnotation.getBegin()));
+    }
+    
+    ParseConverter converter = new ParseConverter(sentence,(Span[]) 
+        tokenSpans.toArray(new Span[tokenSpans.size()]));
+    
+   Parse parse = mParser.parse(converter.getParseForTagger());
+  
+   parse = converter.transformParseFromTagger(parse);
+   
+   if (mLogger.isLoggable(Level.INFO)) {
+     StringBuffer parseString = new StringBuffer();
+     parse.show(parseString);
+     
+     mLogger.log(Level.INFO, parseString.toString());
+   }
+   
+   createAnnotation(cas, sentenceAnnotation.getBegin(), parse);
+  }
+  
+  protected void createAnnotation(CAS cas, int offset, Parse parse) {
+    
+    Parse parseChildrens[] = parse.getChildren();
+    
+    // do this for all children
+    for (int i = 0; i < parseChildrens.length; i++) {
+      Parse child = parseChildrens[i];
+      createAnnotation(cas, offset, child);
+    }
+    
+    AnnotationFS parseAnnotation = cas.createAnnotation(mParseType, offset + 
+        parse.getSpan().getStart(), offset + parse.getSpan().getEnd());
+    
+    parseAnnotation.setStringValue(mTypeFeature, parse.getType());
+    
+    cas.getIndexRepository().addFS(parseAnnotation);
+  }
+
+  /**
+   * Releases allocated resources.
+   */
+  public void destroy() {
+    mParser = null;
+  }
+}

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/ParserModelResource.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/ParserModelResourceImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResource.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResourceImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java Mon Nov  7 19:07:33 2011
@@ -1,242 +1,242 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */ 
-
-package opennlp.uima.postag;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import opennlp.maxent.GIS;
-import opennlp.tools.postag.POSDictionary;
-import opennlp.tools.postag.POSModel;
-import opennlp.tools.postag.POSSample;
-import opennlp.tools.postag.POSTaggerME;
-import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.model.ModelType;
-import opennlp.uima.util.AnnotatorUtil;
-import opennlp.uima.util.CasConsumerUtil;
-import opennlp.uima.util.ContainingConstraint;
-import opennlp.uima.util.OpennlpUtil;
-import opennlp.uima.util.UimaUtil;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
-/**
- * OpenNLP POSTagger trainer.
- * <p>
- * Mandatory parameters
- * <table border=1>
- *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- *   <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
- *   <tr><td>String</td> <td>pennlp.uima.POSFeature</td> <td>The name of the token pos feature,
- *       the feature must be of type String</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.TagDictionaryName</td></tr>
- * </table>
- */
-public class POSTaggerTrainer extends CasConsumer_ImplBase {
-
-  public static final String TAG_DICTIONARY_NAME = "opennlp.uima.TagDictionaryName";
-  
-  private UimaContext mContext;
-
-  private Type mSentenceType;
-
-  private Type mTokenType;
-
-  private String mModelName;
-
-  private Feature mPOSFeature;
-  
-  private Logger mLogger;
-  
-  private List<POSSample> mPOSSamples = new ArrayList<POSSample>();
-  
-  private String language;
-  
-  private POSDictionary tagDictionary;
-  
-  /**
-   * Initializes the current instance.
-   */
-  public void initialize() throws ResourceInitializationException {
-    
-    super.initialize();
-    
-    mContext = getUimaContext();
-    
-    mLogger = mContext.getLogger();
-    
-    if (mLogger.isLoggable(Level.INFO)) {
-      mLogger.log(Level.INFO, "Initializing the OpenNLP " +
-          "POSTagger trainer.");
-    } 
-    
-    mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.MODEL_PARAMETER);
-    
-    language = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.LANGUAGE_PARAMETER);
-    
-    String tagDictionaryName = CasConsumerUtil.getOptionalStringParameter(mContext,
-        TAG_DICTIONARY_NAME);
-
-    if (tagDictionaryName != null) {
-      try {
-        InputStream dictIn = AnnotatorUtil.getResourceAsStream(mContext, tagDictionaryName);
-
-        // TODO: ask Tom if case sensitivity must be configureable
-        tagDictionary = new POSDictionary(new BufferedReader(new InputStreamReader(dictIn)), false);
-
-      } catch (final IOException e) {
-        // if this fails just print error message and continue
-        final String message = "IOException during tag dictionary reading, "
-            + "running without tag dictionary: " + e.getMessage();
-
-        if (this.mLogger.isLoggable(Level.WARNING)) {
-          this.mLogger.log(Level.WARNING, message);
-        }
-      }
-    }
-  }  
-  
-  /**
-   * Initialize the current instance with the given type system.
-   */
-  public void typeSystemInit(TypeSystem typeSystem) 
-      throws ResourceInitializationException {
-    String sentenceTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.SENTENCE_TYPE_PARAMETER);
-    
-    if (mLogger.isLoggable(Level.INFO)) {
-      mLogger.log(Level.INFO, UimaUtil.SENTENCE_TYPE_PARAMETER + ": " +
-          sentenceTypeName);
-    }
-
-    mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
-
-    String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.TOKEN_TYPE_PARAMETER);
-    
-    mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
-    
-    String posFeatureName = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.POS_FEATURE_PARAMETER);
-    
-    mPOSFeature = mTokenType.getFeatureByBaseName(posFeatureName);
-  }
-  
-  /**
-   * Process the given CAS object.
-   */
-  public void processCas(CAS cas) {
-
-    FSIndex<AnnotationFS> sentenceAnnotations = cas.getAnnotationIndex(mSentenceType);
-
-    Iterator<AnnotationFS> sentenceAnnotationsIterator = sentenceAnnotations.iterator();
-
-    while (sentenceAnnotationsIterator.hasNext()) {
-
-      AnnotationFS sentence = 
-        (AnnotationFS) sentenceAnnotationsIterator.next();
-
-      process(cas, sentence);
-    }
-  }
-  
-  private void process(CAS tcas, AnnotationFS sentence) {
-    
-    FSIndex<AnnotationFS> allTokens = tcas.getAnnotationIndex(mTokenType);
-
-    ContainingConstraint containingConstraint = 
-        new ContainingConstraint(sentence);
-    
-    List<String> tokens = new ArrayList<String>();
-    List<String> tags = new ArrayList<String>();
-    
-    Iterator<AnnotationFS> containingTokens = tcas.createFilteredIterator(
-        allTokens.iterator(), containingConstraint);
-    
-    while (containingTokens.hasNext()) {
-      
-      AnnotationFS tokenAnnotation = (AnnotationFS) containingTokens.next();
-      
-      String tag = tokenAnnotation.getFeatureValueAsString(mPOSFeature);
-      
-      tokens.add(tokenAnnotation.getCoveredText().trim());
-      tags.add(tag);
-    }
-    
-    mPOSSamples.add(new POSSample(tokens, tags));
-  }
-  
-  /**
-   * Called if the processing is finished, this method
-   * does the training.
-   */
-  public void collectionProcessComplete(ProcessTrace trace) 
-      throws ResourceProcessException, IOException {
-    
-    GIS.PRINT_MESSAGES = false;
-
-    POSModel posTaggerModel = POSTaggerME.train(language, 
-        ObjectStreamUtils.createObjectStream(mPOSSamples),
-        ModelType.MAXENT, tagDictionary, null, 100, 5);
-    
-    // dereference to allow garbage collection
-    mPOSSamples = null;
-    
-    File modelFile = new File(getUimaContextAdmin().getResourceManager()
-        .getDataPath() + File.separatorChar + mModelName);
-
-    OpennlpUtil.serialize(posTaggerModel, modelFile);
-  }
-  
-  /**
-   * The trainer is not stateless.
-   */
-  public boolean isStateless() {
-    return false;
-  }
-  
-  /**
-   * Releases allocated resources.
-   */
-  public void destroy() {
-   // dereference to allow garbage collection
-    mPOSSamples = null;
-  }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */ 
+
+package opennlp.uima.postag;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import opennlp.maxent.GIS;
+import opennlp.tools.postag.POSDictionary;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.util.ObjectStreamUtils;
+import opennlp.tools.util.model.ModelType;
+import opennlp.uima.util.AnnotatorUtil;
+import opennlp.uima.util.CasConsumerUtil;
+import opennlp.uima.util.ContainingConstraint;
+import opennlp.uima.util.OpennlpUtil;
+import opennlp.uima.util.UimaUtil;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.ProcessTrace;
+
+/**
+ * OpenNLP POSTagger trainer.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
+ *   <tr><td>String</td> <td>pennlp.uima.POSFeature</td> <td>The name of the token pos feature,
+ *       the feature must be of type String</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.TagDictionaryName</td></tr>
+ * </table>
+ */
+public class POSTaggerTrainer extends CasConsumer_ImplBase {
+
+  public static final String TAG_DICTIONARY_NAME = "opennlp.uima.TagDictionaryName";
+  
+  private UimaContext mContext;
+
+  private Type mSentenceType;
+
+  private Type mTokenType;
+
+  private String mModelName;
+
+  private Feature mPOSFeature;
+  
+  private Logger mLogger;
+  
+  private List<POSSample> mPOSSamples = new ArrayList<POSSample>();
+  
+  private String language;
+  
+  private POSDictionary tagDictionary;
+  
+  /**
+   * Initializes the current instance.
+   */
+  public void initialize() throws ResourceInitializationException {
+    
+    super.initialize();
+    
+    mContext = getUimaContext();
+    
+    mLogger = mContext.getLogger();
+    
+    if (mLogger.isLoggable(Level.INFO)) {
+      mLogger.log(Level.INFO, "Initializing the OpenNLP " +
+          "POSTagger trainer.");
+    } 
+    
+    mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
+        UimaUtil.MODEL_PARAMETER);
+    
+    language = CasConsumerUtil.getRequiredStringParameter(mContext,
+        UimaUtil.LANGUAGE_PARAMETER);
+    
+    String tagDictionaryName = CasConsumerUtil.getOptionalStringParameter(mContext,
+        TAG_DICTIONARY_NAME);
+
+    if (tagDictionaryName != null) {
+      try {
+        InputStream dictIn = AnnotatorUtil.getResourceAsStream(mContext, tagDictionaryName);
+
+        // TODO: ask Tom if case sensitivity must be configureable
+        tagDictionary = new POSDictionary(new BufferedReader(new InputStreamReader(dictIn)), false);
+
+      } catch (final IOException e) {
+        // if this fails just print error message and continue
+        final String message = "IOException during tag dictionary reading, "
+            + "running without tag dictionary: " + e.getMessage();
+
+        if (this.mLogger.isLoggable(Level.WARNING)) {
+          this.mLogger.log(Level.WARNING, message);
+        }
+      }
+    }
+  }  
+  
+  /**
+   * Initialize the current instance with the given type system.
+   */
+  public void typeSystemInit(TypeSystem typeSystem) 
+      throws ResourceInitializationException {
+    String sentenceTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
+        UimaUtil.SENTENCE_TYPE_PARAMETER);
+    
+    if (mLogger.isLoggable(Level.INFO)) {
+      mLogger.log(Level.INFO, UimaUtil.SENTENCE_TYPE_PARAMETER + ": " +
+          sentenceTypeName);
+    }
+
+    mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
+
+    String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
+        UimaUtil.TOKEN_TYPE_PARAMETER);
+    
+    mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
+    
+    String posFeatureName = CasConsumerUtil.getRequiredStringParameter(mContext,
+        UimaUtil.POS_FEATURE_PARAMETER);
+    
+    mPOSFeature = mTokenType.getFeatureByBaseName(posFeatureName);
+  }
+  
+  /**
+   * Process the given CAS object.
+   */
+  public void processCas(CAS cas) {
+
+    FSIndex<AnnotationFS> sentenceAnnotations = cas.getAnnotationIndex(mSentenceType);
+
+    Iterator<AnnotationFS> sentenceAnnotationsIterator = sentenceAnnotations.iterator();
+
+    while (sentenceAnnotationsIterator.hasNext()) {
+
+      AnnotationFS sentence = 
+        (AnnotationFS) sentenceAnnotationsIterator.next();
+
+      process(cas, sentence);
+    }
+  }
+  
+  private void process(CAS tcas, AnnotationFS sentence) {
+    
+    FSIndex<AnnotationFS> allTokens = tcas.getAnnotationIndex(mTokenType);
+
+    ContainingConstraint containingConstraint = 
+        new ContainingConstraint(sentence);
+    
+    List<String> tokens = new ArrayList<String>();
+    List<String> tags = new ArrayList<String>();
+    
+    Iterator<AnnotationFS> containingTokens = tcas.createFilteredIterator(
+        allTokens.iterator(), containingConstraint);
+    
+    while (containingTokens.hasNext()) {
+      
+      AnnotationFS tokenAnnotation = (AnnotationFS) containingTokens.next();
+      
+      String tag = tokenAnnotation.getFeatureValueAsString(mPOSFeature);
+      
+      tokens.add(tokenAnnotation.getCoveredText().trim());
+      tags.add(tag);
+    }
+    
+    mPOSSamples.add(new POSSample(tokens, tags));
+  }
+  
+  /**
+   * Called if the processing is finished, this method
+   * does the training.
+   */
+  public void collectionProcessComplete(ProcessTrace trace) 
+      throws ResourceProcessException, IOException {
+    
+    GIS.PRINT_MESSAGES = false;
+
+    POSModel posTaggerModel = POSTaggerME.train(language, 
+        ObjectStreamUtils.createObjectStream(mPOSSamples),
+        ModelType.MAXENT, tagDictionary, null, 100, 5);
+    
+    // dereference to allow garbage collection
+    mPOSSamples = null;
+    
+    File modelFile = new File(getUimaContextAdmin().getResourceManager()
+        .getDataPath() + File.separatorChar + mModelName);
+
+    OpennlpUtil.serialize(posTaggerModel, modelFile);
+  }
+  
+  /**
+   * The trainer is not stateless.
+   */
+  public boolean isStateless() {
+    return false;
+  }
+  
+  /**
+   * Releases allocated resources.
+   */
+  public void destroy() {
+   // dereference to allow garbage collection
+    mPOSSamples = null;
+  }
 }
\ No newline at end of file

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java Mon Nov  7 19:07:33 2011
@@ -1,133 +1,133 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */ 
-
-package opennlp.uima.sentdetect;
-
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.util.Span;
-import opennlp.uima.util.AnnotatorUtil;
-import opennlp.uima.util.UimaUtil;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.resource.ResourceAccessException;
-import org.apache.uima.resource.ResourceInitializationException;
-
-/**
- * OpenNLP Sentence annotator.
- * <p>
- * Mandatory parameters
- * <table border=1>
- *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- *   <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- * </table>
- * <p>  
- * Optional parameters
- * <table border=1>
- *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * 	 <tr><td>String</td> <td>opennlp.uima.ContainerType</td> <td>The name of the container type</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.ProbabilityFeature</td> <td>The name of the double
- *       probability feature (not set by default)</td></tr>
- * </table>
- */
-public final class SentenceDetector extends AbstractSentenceDetector {
-
-  /**
-   * OpenNLP sentence detector.
-   */
-  private SentenceDetectorME sentenceDetector;
-
-  private Feature probabilityFeature;
-  
-  /**
-   * Initializes a new instance.
-   *
-   * Note: Use {@link #initialize(UimaContext) } to initialize 
-   * this instance. Not use the constructor.
-   */
-  public SentenceDetector() {
-    // must not be implemented !
-  }
-  
-  /**
-   * Initializes the current instance with the given context.
-   * 
-   * Note: Do all initialization in this method, do not use the constructor.
-   */
-  public void initialize(UimaContext context)
-      throws ResourceInitializationException {
-
-    super.initialize(context);
-
-    SentenceModel model;
-
-    try {
-      SentenceModelResource modelResource = (SentenceModelResource) context
-          .getResourceObject(UimaUtil.MODEL_PARAMETER);
-
-      model = modelResource.getModel();
-    } catch (ResourceAccessException e) {
-      throw new ResourceInitializationException(e);
-    }
-
-    sentenceDetector = new SentenceDetectorME(model);
-  }
-
-  /**
-   * Initializes the type system.
-   */
-  public void typeSystemInit(TypeSystem typeSystem)
-      throws AnalysisEngineProcessException {
-
-    super.typeSystemInit(typeSystem);
-
-    probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context,
-        sentenceType, UimaUtil.PROBABILITY_FEATURE_PARAMETER,
-        CAS.TYPE_NAME_DOUBLE);
-  }
-
-  @Override
-  protected Span[] detectSentences(String text) {
-    return sentenceDetector.sentPosDetect(text);
-  }
-  
-  @Override
-  protected void postProcessAnnotations(AnnotationFS sentences[]) {
-    
-    if (probabilityFeature != null) {
-      double sentenceProbabilities[] = sentenceDetector.getSentenceProbabilities(); 
-      
-      for (int i = 0; i < sentences.length; i++) {
-        sentences[i].setDoubleValue(probabilityFeature, sentenceProbabilities[i]);
-      }
-    }
-  }
-  
-  /**
-   * Releases allocated resources.
-   */
-  public void destroy() {
-    // dereference model to allow garbage collection 
-    sentenceDetector = null;
-  }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */ 
+
+package opennlp.uima.sentdetect;
+
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.util.Span;
+import opennlp.uima.util.AnnotatorUtil;
+import opennlp.uima.util.UimaUtil;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.resource.ResourceAccessException;
+import org.apache.uima.resource.ResourceInitializationException;
+
+/**
+ * OpenNLP Sentence annotator.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ * </table>
+ * <p>  
+ * Optional parameters
+ * <table border=1>
+ *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ * 	 <tr><td>String</td> <td>opennlp.uima.ContainerType</td> <td>The name of the container type</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.ProbabilityFeature</td> <td>The name of the double
+ *       probability feature (not set by default)</td></tr>
+ * </table>
+ */
+public final class SentenceDetector extends AbstractSentenceDetector {
+
+  /**
+   * OpenNLP sentence detector.
+   */
+  private SentenceDetectorME sentenceDetector;
+
+  private Feature probabilityFeature;
+  
+  /**
+   * Initializes a new instance.
+   *
+   * Note: Use {@link #initialize(UimaContext) } to initialize 
+   * this instance. Not use the constructor.
+   */
+  public SentenceDetector() {
+    // must not be implemented !
+  }
+  
+  /**
+   * Initializes the current instance with the given context.
+   * 
+   * Note: Do all initialization in this method, do not use the constructor.
+   */
+  public void initialize(UimaContext context)
+      throws ResourceInitializationException {
+
+    super.initialize(context);
+
+    SentenceModel model;
+
+    try {
+      SentenceModelResource modelResource = (SentenceModelResource) context
+          .getResourceObject(UimaUtil.MODEL_PARAMETER);
+
+      model = modelResource.getModel();
+    } catch (ResourceAccessException e) {
+      throw new ResourceInitializationException(e);
+    }
+
+    sentenceDetector = new SentenceDetectorME(model);
+  }
+
+  /**
+   * Initializes the type system.
+   */
+  public void typeSystemInit(TypeSystem typeSystem)
+      throws AnalysisEngineProcessException {
+
+    super.typeSystemInit(typeSystem);
+
+    probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context,
+        sentenceType, UimaUtil.PROBABILITY_FEATURE_PARAMETER,
+        CAS.TYPE_NAME_DOUBLE);
+  }
+
+  @Override
+  protected Span[] detectSentences(String text) {
+    return sentenceDetector.sentPosDetect(text);
+  }
+  
+  @Override
+  protected void postProcessAnnotations(AnnotationFS sentences[]) {
+    
+    if (probabilityFeature != null) {
+      double sentenceProbabilities[] = sentenceDetector.getSentenceProbabilities(); 
+      
+      for (int i = 0; i < sentences.length; i++) {
+        sentences[i].setDoubleValue(probabilityFeature, sentenceProbabilities[i]);
+      }
+    }
+  }
+  
+  /**
+   * Releases allocated resources.
+   */
+  public void destroy() {
+    // dereference model to allow garbage collection 
+    sentenceDetector = null;
+  }
+}

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java Mon Nov  7 19:07:33 2011
@@ -1,163 +1,163 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.sentdetect;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import opennlp.maxent.GIS;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.Span;
-import opennlp.uima.util.CasConsumerUtil;
-import opennlp.uima.util.OpennlpUtil;
-import opennlp.uima.util.UimaUtil;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
-/**
- * OpenNLP SentenceDetector trainer.
- * <p>
- * Mandatory parameters
- * <table border=1>
- *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- *   <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- * </table>
- */
-public final class SentenceDetectorTrainer extends CasConsumer_ImplBase {
-  
-  private List<SentenceSample> sentenceSamples = new ArrayList<SentenceSample>();
-
-  private Type mSentenceType;
-
-  private String mModelName;
-  
-  private String language = "en";
-  
-  private Logger mLogger;
-
-  private UimaContext mContext;
-  
-  /**
-   * Initializes the current instance.
-   */
-  public void initialize() throws ResourceInitializationException {
-    
-    super.initialize();
-    
-    mContext = getUimaContext();
-    
-    mLogger = mContext.getLogger();
-    
-    if (mLogger.isLoggable(Level.INFO)) {
-      mLogger.log(Level.INFO, "Initializing the OpenNLP SentenceDetector " +
-          "trainer.");
-    } 
-    
-    mModelName = CasConsumerUtil.getRequiredStringParameter(mContext, 
-        UimaUtil.MODEL_PARAMETER);
-    
-    language = CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.LANGUAGE_PARAMETER);
-  }
-  
-  /**
-   * Initializes the current instance with the given type system.
-   */
-  public void typeSystemInit(TypeSystem typeSystem)
-      throws ResourceInitializationException {
-    
-    String sentenceTypeName = 
-        CasConsumerUtil.getRequiredStringParameter(mContext,
-        UimaUtil.SENTENCE_TYPE_PARAMETER);
-    
-    mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
-  }
-
-  /**
-   * Process the given CAS object.
-   */
-  public void processCas(CAS cas) {
-
-    FSIndex<AnnotationFS> sentenceIndex = cas.getAnnotationIndex(mSentenceType);
-
-    Span[] sentSpans = new Span[sentenceIndex.size()];
-
-    int i = 0;
-    Iterator<AnnotationFS> sentenceIterator = sentenceIndex.iterator();
-    while (sentenceIterator.hasNext()) {
-      AnnotationFS sentenceAnnotation = (AnnotationFS) sentenceIterator.next();
-
-      sentSpans[i++] = new Span(sentenceAnnotation.getBegin(), sentenceAnnotation.getEnd());
-    }
-
-    sentenceSamples.add(new SentenceSample(cas.getDocumentText(), sentSpans));
-  }
-
-  /**
-   * Called if the processing is finished, this method
-   * does the training.
-   */
-  public void collectionProcessComplete(ProcessTrace trace)
-      throws ResourceProcessException, IOException {
-    GIS.PRINT_MESSAGES = false;
-
-    SentenceModel sentenceModel = SentenceDetectorME.train(language,
-        ObjectStreamUtils.createObjectStream(sentenceSamples), true, null);
-    
-    // dereference to allow garbage collection
-    sentenceSamples = null;
-    
-    File modelFile = new File(getUimaContextAdmin().getResourceManager()
-        .getDataPath() + File.separatorChar + mModelName);
-
-    OpennlpUtil.serialize(sentenceModel,modelFile);
-  }
-
-  /**
-   * The trainer is not stateless.
-   */
-  public boolean isStateless() {
-    return false;
-  }
-  
-  /**
-   * Releases allocated resources.
-   */
-  public void destroy() {
-    // dereference to allow garbage collection
-    sentenceSamples = null;
-  }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.sentdetect;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import opennlp.maxent.GIS;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.sentdetect.SentenceSample;
+import opennlp.tools.util.ObjectStreamUtils;
+import opennlp.tools.util.Span;
+import opennlp.uima.util.CasConsumerUtil;
+import opennlp.uima.util.OpennlpUtil;
+import opennlp.uima.util.UimaUtil;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.ProcessTrace;
+
+/**
+ * OpenNLP SentenceDetector trainer.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ * </table>
+ */
+public final class SentenceDetectorTrainer extends CasConsumer_ImplBase {
+  
+  private List<SentenceSample> sentenceSamples = new ArrayList<SentenceSample>();
+
+  private Type mSentenceType;
+
+  private String mModelName;
+  
+  private String language = "en";
+  
+  private Logger mLogger;
+
+  private UimaContext mContext;
+  
+  /**
+   * Initializes the current instance.
+   */
+  public void initialize() throws ResourceInitializationException {
+    
+    super.initialize();
+    
+    mContext = getUimaContext();
+    
+    mLogger = mContext.getLogger();
+    
+    if (mLogger.isLoggable(Level.INFO)) {
+      mLogger.log(Level.INFO, "Initializing the OpenNLP SentenceDetector " +
+          "trainer.");
+    } 
+    
+    mModelName = CasConsumerUtil.getRequiredStringParameter(mContext, 
+        UimaUtil.MODEL_PARAMETER);
+    
+    language = CasConsumerUtil.getRequiredStringParameter(mContext,
+        UimaUtil.LANGUAGE_PARAMETER);
+  }
+  
+  /**
+   * Initializes the current instance with the given type system.
+   */
+  public void typeSystemInit(TypeSystem typeSystem)
+      throws ResourceInitializationException {
+    
+    String sentenceTypeName = 
+        CasConsumerUtil.getRequiredStringParameter(mContext,
+        UimaUtil.SENTENCE_TYPE_PARAMETER);
+    
+    mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
+  }
+
+  /**
+   * Process the given CAS object.
+   */
+  public void processCas(CAS cas) {
+
+    FSIndex<AnnotationFS> sentenceIndex = cas.getAnnotationIndex(mSentenceType);
+
+    Span[] sentSpans = new Span[sentenceIndex.size()];
+
+    int i = 0;
+    Iterator<AnnotationFS> sentenceIterator = sentenceIndex.iterator();
+    while (sentenceIterator.hasNext()) {
+      AnnotationFS sentenceAnnotation = (AnnotationFS) sentenceIterator.next();
+
+      sentSpans[i++] = new Span(sentenceAnnotation.getBegin(), sentenceAnnotation.getEnd());
+    }
+
+    sentenceSamples.add(new SentenceSample(cas.getDocumentText(), sentSpans));
+  }
+
+  /**
+   * Called if the processing is finished, this method
+   * does the training.
+   */
+  public void collectionProcessComplete(ProcessTrace trace)
+      throws ResourceProcessException, IOException {
+    GIS.PRINT_MESSAGES = false;
+
+    SentenceModel sentenceModel = SentenceDetectorME.train(language,
+        ObjectStreamUtils.createObjectStream(sentenceSamples), true, null);
+    
+    // dereference to allow garbage collection
+    sentenceSamples = null;
+    
+    File modelFile = new File(getUimaContextAdmin().getResourceManager()
+        .getDataPath() + File.separatorChar + mModelName);
+
+    OpennlpUtil.serialize(sentenceModel,modelFile);
+  }
+
+  /**
+   * The trainer is not stateless.
+   */
+  public boolean isStateless() {
+    return false;
+  }
+  
+  /**
+   * Releases allocated resources.
+   */
+  public void destroy() {
+    // dereference to allow garbage collection
+    sentenceSamples = null;
+  }
 }
\ No newline at end of file

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResource.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/package.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java Mon Nov  7 19:07:33 2011
@@ -1,59 +1,59 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.tokenize;
-
-import opennlp.tools.util.Span;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.text.AnnotationFS;
-
-/**
- * OpenNLP Simple Tokenizer annotator.
- * <p>
- * Mandatory parameters
- * <table border=1>
- *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
- * </table>
- */
-public final class SimpleTokenizer extends AbstractTokenizer {
-  
-  /**
-   * The OpenNLP simple tokenizer.
-   */
-  private opennlp.tools.tokenize.SimpleTokenizer tokenizer = 
-      opennlp.tools.tokenize.SimpleTokenizer.INSTANCE;
-
-  /**
-   * Initializes the current instance.
-   *
-   * Note: Use {@link #initialize(UimaContext) } to initialize 
-   * this instance. Not use the constructor.
-   */
-  public SimpleTokenizer() {
-	  super("OpenNLP Simple Tokenizer");
-    // must not be implemented !
-  }
-  
-  @Override
-  protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
-    return tokenizer.tokenizePos(sentence.getCoveredText());
-  }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.tokenize;
+
+import opennlp.tools.util.Span;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.text.AnnotationFS;
+
+/**
+ * OpenNLP Simple Tokenizer annotator.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
+ * </table>
+ */
+public final class SimpleTokenizer extends AbstractTokenizer {
+  
+  /**
+   * The OpenNLP simple tokenizer.
+   */
+  private opennlp.tools.tokenize.SimpleTokenizer tokenizer = 
+      opennlp.tools.tokenize.SimpleTokenizer.INSTANCE;
+
+  /**
+   * Initializes the current instance.
+   *
+   * Note: Use {@link #initialize(UimaContext) } to initialize 
+   * this instance. Not use the constructor.
+   */
+  public SimpleTokenizer() {
+	  super("OpenNLP Simple Tokenizer");
+    // must not be implemented !
+  }
+  
+  @Override
+  protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
+    return tokenizer.tokenizePos(sentence.getCoveredText());
+  }
 }
\ No newline at end of file

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java Mon Nov  7 19:07:33 2011
@@ -1,139 +1,139 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.tokenize;
-
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import opennlp.tools.util.Span;
-import opennlp.uima.util.AnnotatorUtil;
-import opennlp.uima.util.UimaUtil;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.resource.ResourceAccessException;
-import org.apache.uima.resource.ResourceInitializationException;
-
-/**
- * OpenNLP Tokenizer annotator.
- * <p>
- * Mandatory parameters
- * <table border=1>
- *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- *   <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- *   <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
- * </table>
- * <p>
- * Optional parameters
- * <table border=1>
- *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- *   <tr><td>String</td> <td>opennlp.uima.ProbabilityFeature</td> <td>The name of the double 
- *       probability feature (not set by default)</td>
- * </table>
- * @see {@link TokenizerME}
- */
-public final class Tokenizer extends AbstractTokenizer {
-  
-  /**
-   * The OpenNLP tokenizer.
-   */
-  private TokenizerME tokenizer;
-  
-  private Feature probabilityFeature;
-  
-  /**
-   * Initializes a new instance.
-   *
-   * Note: Use {@link #initialize(UimaContext) } to initialize 
-   * this instance. Not use the constructor.
-   */
-  public Tokenizer() {
-    super("OpenNLP Tokenizer");
-	  
-    // must not be implemented !
-  }
-  
-  /**
-   * Initializes the current instance with the given context.
-   * 
-   * Note: Do all initialization in this method, do not use the constructor.
-   */
-  public void initialize(UimaContext context)
-      throws ResourceInitializationException {
-
-    super.initialize(context);
-
-    TokenizerModel model;
-
-    try {
-      TokenizerModelResource modelResource = (TokenizerModelResource) context
-          .getResourceObject(UimaUtil.MODEL_PARAMETER);
-
-      model = modelResource.getModel();
-    } catch (ResourceAccessException e) {
-      throw new ResourceInitializationException(e);
-    }
-
-    tokenizer = new TokenizerME(model);
-  }
-
-  /**
-   * Initializes the type system.
-   */
-  public void typeSystemInit(TypeSystem typeSystem)
-      throws AnalysisEngineProcessException {
-
-    super.typeSystemInit(typeSystem);
-
-    probabilityFeature = AnnotatorUtil
-        .getOptionalFeatureParameter(context, tokenType,
-            UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);
-  }
-
-  
-  @Override
-  protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
-    return tokenizer.tokenizePos(sentence.getCoveredText());
-  }
-  
-  @Override
-  protected void postProcessAnnotations(Span[] tokens,
-      AnnotationFS[] tokenAnnotations) {
-    // if interest
-    if (probabilityFeature != null) {
-      double tokenProbabilties[] = tokenizer.getTokenProbabilities();
-
-      for (int i = 0; i < tokenAnnotations.length; i++) {
-        tokenAnnotations[i].setDoubleValue(probabilityFeature,
-            tokenProbabilties[i]);
-      }
-    }
-  }
-  
-  /**
-   * Releases allocated resources.
-   */
-  public void destroy() {
-    // dereference model to allow garbage collection 
-    tokenizer = null;
-  }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.tokenize;
+
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.Span;
+import opennlp.uima.util.AnnotatorUtil;
+import opennlp.uima.util.UimaUtil;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.resource.ResourceAccessException;
+import org.apache.uima.resource.ResourceInitializationException;
+
+/**
+ * OpenNLP Tokenizer annotator.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
+ * </table>
+ * <p>
+ * Optional parameters
+ * <table border=1>
+ *   <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ *   <tr><td>String</td> <td>opennlp.uima.ProbabilityFeature</td> <td>The name of the double 
+ *       probability feature (not set by default)</td>
+ * </table>
+ * @see {@link TokenizerME}
+ */
+public final class Tokenizer extends AbstractTokenizer {
+  
+  /**
+   * The OpenNLP tokenizer.
+   */
+  private TokenizerME tokenizer;
+  
+  private Feature probabilityFeature;
+  
+  /**
+   * Initializes a new instance.
+   *
+   * Note: Use {@link #initialize(UimaContext) } to initialize 
+   * this instance. Not use the constructor.
+   */
+  public Tokenizer() {
+    super("OpenNLP Tokenizer");
+	  
+    // must not be implemented !
+  }
+  
+  /**
+   * Initializes the current instance with the given context.
+   * 
+   * Note: Do all initialization in this method, do not use the constructor.
+   */
+  public void initialize(UimaContext context)
+      throws ResourceInitializationException {
+
+    super.initialize(context);
+
+    TokenizerModel model;
+
+    try {
+      TokenizerModelResource modelResource = (TokenizerModelResource) context
+          .getResourceObject(UimaUtil.MODEL_PARAMETER);
+
+      model = modelResource.getModel();
+    } catch (ResourceAccessException e) {
+      throw new ResourceInitializationException(e);
+    }
+
+    tokenizer = new TokenizerME(model);
+  }
+
+  /**
+   * Initializes the type system.
+   */
+  public void typeSystemInit(TypeSystem typeSystem)
+      throws AnalysisEngineProcessException {
+
+    super.typeSystemInit(typeSystem);
+
+    probabilityFeature = AnnotatorUtil
+        .getOptionalFeatureParameter(context, tokenType,
+            UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);
+  }
+
+  
+  @Override
+  protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
+    return tokenizer.tokenizePos(sentence.getCoveredText());
+  }
+  
+  @Override
+  protected void postProcessAnnotations(Span[] tokens,
+      AnnotationFS[] tokenAnnotations) {
+    // if interest
+    if (probabilityFeature != null) {
+      double tokenProbabilties[] = tokenizer.getTokenProbabilities();
+
+      for (int i = 0; i < tokenAnnotations.length; i++) {
+        tokenAnnotations[i].setDoubleValue(probabilityFeature,
+            tokenProbabilties[i]);
+      }
+    }
+  }
+  
+  /**
+   * Releases allocated resources.
+   */
+  public void destroy() {
+    // dereference model to allow garbage collection 
+    tokenizer = null;
+  }
 }
\ No newline at end of file

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerModelResource.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerModelResourceImpl.java
------------------------------------------------------------------------------
    svn:eol-style = native