You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2011/11/07 20:07:50 UTC
svn commit: r1198880 [5/7] - in /incubator/opennlp/trunk: ./ opennlp-distr/
opennlp-distr/src/main/assembly/ opennlp-distr/src/main/readme/
opennlp-docs/ opennlp-docs/src/docbkx/ opennlp-docs/src/docbkx/css/
opennlp-docs/src/main/resources/xsl/ opennlp...
Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java Mon Nov 7 19:07:33 2011
@@ -1,346 +1,346 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.parser;
-
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-import opennlp.tools.parser.Parse;
-import opennlp.tools.parser.ParserFactory;
-import opennlp.tools.parser.ParserModel;
-import opennlp.tools.util.Span;
-import opennlp.uima.util.AnnotatorUtil;
-import opennlp.uima.util.ContainingConstraint;
-import opennlp.uima.util.UimaUtil;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.resource.ResourceAccessException;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-
-/**
- * Abstract base class for OpenNLP Parser annotators.
- * <p>
- * Mandatory parameters
- * <table border=1>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.ParseType</td> <td>The full name of the parse type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.TypeFeature</td> <td>The name of the type feature</td></tr>
- * </table>
- * <p>
- * Optional parameters
- * <table border=1>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>Integer</td> <td>opennlp.uima.BeamSize</td></tr>
- * </table>
- */
-public class Parser extends CasAnnotator_ImplBase {
-
- private static class ParseConverter {
- private Map<Integer, Integer> mIndexMap = new HashMap<Integer, Integer>();
-
- private Parse mParseForTagger;
-
- private String mSentence;
-
- /**
- * Initializes a new instance.
- *
- * @param sentence
- * @param tokens
- */
- public ParseConverter(String sentence, Span tokens[]) {
-
- mSentence = sentence;
-
- StringBuilder sentenceStringBuilder = new StringBuilder();
-
- String tokenList[] = new String[tokens.length];
-
- for (int i = 0; i < tokens.length; i++) {
- String tokenString = tokens[i].getCoveredText(sentence).toString();
- String escapedToken = escape(tokenString);
- tokenList[i] = escapedToken;
-
- int escapedStart = sentenceStringBuilder.length();
- int start = tokens[i].getStart();
- mIndexMap.put(new Integer(escapedStart), new Integer(start));
-
- int escapedEnd = escapedStart + escapedToken.length();
- int end = tokens[i].getEnd();
- mIndexMap.put(new Integer(escapedEnd), new Integer(end));
-
- sentenceStringBuilder.append(tokenList[i]);
-
- sentenceStringBuilder.append(' ');
- }
-
- // remove last space
- sentenceStringBuilder.setLength(sentenceStringBuilder.length() - 1);
-
- String tokenizedSentence = sentenceStringBuilder.toString();
-
- mParseForTagger = new Parse(tokenizedSentence,
- new Span(0, tokenizedSentence.length()), "INC", 1, null);
-
- int start = 0;
-
- for (int i = 0; i < tokenList.length; i++) {
-
- mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start,
- start + tokenList[i].length()),
- opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));
-
- start += tokenList[i].length() + 1;
- }
- }
-
- private static String escape(String text) {
- return text;
- }
-
- /**
- * Creates the parse for the tagger.
- *
- * @return the parse which can be passed to the tagger
- */
- Parse getParseForTagger() {
- return mParseForTagger;
- }
-
- /**
- * Converts the parse from the tagger back.
- *
- * @param parseFromTagger
- * @return the final parse
- */
- Parse transformParseFromTagger(Parse parseFromTagger) {
- int start = parseFromTagger.getSpan().getStart();
- int end = parseFromTagger.getSpan().getEnd();
-
-
- Parse transformedParse = new Parse(mSentence,
- new Span(((Integer) mIndexMap.get(new Integer(start))).intValue(),
- ((Integer) mIndexMap.get(new Integer(end))).intValue()),
- parseFromTagger.getType(),
- parseFromTagger.getProb(), parseFromTagger.getHeadIndex());
-
-
- Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren();
-
- // call this method for all childs ...
- for (int i = 0; i < parseFromTaggerChildrens.length; i++) {
-
- Parse child = parseFromTaggerChildrens[i];
-
- if (!child.getType().equals(
- opennlp.tools.parser.chunking.Parser.TOK_NODE)) {
-
- // only insert if it has childs
- if (child.getChildCount() > 0 &&
- !child.getChildren()[0].getType().equals(opennlp.tools.parser.chunking.Parser.TOK_NODE)) {
- transformedParse.insert(transformParseFromTagger(child));
- }
- }
- }
-
- if (parseFromTagger.getType().equals("TOP")) {
- return transformedParse.getChildren()[0];
- }
- else {
- return transformedParse;
- }
- }
-
- }
-
- private static final String PARSE_TYPE_PARAMETER = "opennlp.uima.ParseType";
-
- public static final String TYPE_FEATURE_PARAMETER =
- "opennlp.uima.TypeFeature";
-
- protected UimaContext context;
-
- protected Logger mLogger;
-
- private Type mSentenceType;
-
- private Type mTokenType;
-
- protected opennlp.tools.parser.Parser mParser;
-
- private Type mParseType;
-
- private Feature mTypeFeature;
-
- /**
- * Initializes the current instance with the given context.
- */
- public void initialize(UimaContext context)
- throws ResourceInitializationException {
-
- super.initialize(context);
-
- this.context = context;
-
- mLogger = context.getLogger();
-
- if (mLogger.isLoggable(Level.INFO)) {
- mLogger.log(Level.INFO, "Initializing the OpenNLP Parser.");
- }
-
- ParserModel model;
-
- try {
- ParserModelResource modelResource = (ParserModelResource) context
- .getResourceObject(UimaUtil.MODEL_PARAMETER);
-
- model = modelResource.getModel();
- } catch (ResourceAccessException e) {
- throw new ResourceInitializationException(e);
- }
-
- mParser = ParserFactory.create(model);
- }
-
- /**
- * Initializes the type system.
- */
- public void typeSystemInit(TypeSystem typeSystem)
- throws AnalysisEngineProcessException {
-
- mSentenceType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
- UimaUtil.SENTENCE_TYPE_PARAMETER);
-
- mTokenType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
- UimaUtil.TOKEN_TYPE_PARAMETER);
-
- mParseType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
- PARSE_TYPE_PARAMETER);
-
- mTypeFeature = AnnotatorUtil.getRequiredFeatureParameter(context,
- mParseType, TYPE_FEATURE_PARAMETER, CAS.TYPE_NAME_STRING);
- }
-
- /**
- * Performs parsing on the given {@link CAS} object.
- */
- public void process(CAS cas) {
- FSIndex<AnnotationFS> sentences = cas.getAnnotationIndex(mSentenceType);
-
- Iterator<AnnotationFS> sentencesIterator = sentences.iterator();
-
- while (sentencesIterator.hasNext()) {
- AnnotationFS sentence = (AnnotationFS) sentencesIterator.next();
-
- process(cas, sentence);
- }
- }
-
- protected void process(CAS cas, AnnotationFS sentenceAnnotation) {
- FSIndex<AnnotationFS> allTokens = cas.getAnnotationIndex(mTokenType);
-
- ContainingConstraint containingConstraint =
- new ContainingConstraint(sentenceAnnotation);
-
- Iterator<AnnotationFS> containingTokens = cas.createFilteredIterator(
- allTokens.iterator(), containingConstraint);
-
- StringBuilder sentenceStringBuilder = new StringBuilder();
-
- while (containingTokens.hasNext()) {
- AnnotationFS token = (AnnotationFS) containingTokens.next();
-
- sentenceStringBuilder.append(token.getCoveredText());
-
- // attention the offsets moves inside the sentence...
- sentenceStringBuilder.append(' ');
- }
-
- String sentence = sentenceStringBuilder.toString();
- sentence = sentenceAnnotation.getCoveredText();
-
- containingTokens = cas.createFilteredIterator(
- allTokens.iterator(), containingConstraint);
-
- List<Span> tokenSpans = new LinkedList<Span>();
-
- while(containingTokens.hasNext()) {
- AnnotationFS token = (AnnotationFS) containingTokens.next();
-
- tokenSpans.add(new Span(token.getBegin() - sentenceAnnotation.getBegin(),
- token.getEnd() - sentenceAnnotation.getBegin()));
- }
-
- ParseConverter converter = new ParseConverter(sentence,(Span[])
- tokenSpans.toArray(new Span[tokenSpans.size()]));
-
- Parse parse = mParser.parse(converter.getParseForTagger());
-
- parse = converter.transformParseFromTagger(parse);
-
- if (mLogger.isLoggable(Level.INFO)) {
- StringBuffer parseString = new StringBuffer();
- parse.show(parseString);
-
- mLogger.log(Level.INFO, parseString.toString());
- }
-
- createAnnotation(cas, sentenceAnnotation.getBegin(), parse);
- }
-
- protected void createAnnotation(CAS cas, int offset, Parse parse) {
-
- Parse parseChildrens[] = parse.getChildren();
-
- // do this for all children
- for (int i = 0; i < parseChildrens.length; i++) {
- Parse child = parseChildrens[i];
- createAnnotation(cas, offset, child);
- }
-
- AnnotationFS parseAnnotation = cas.createAnnotation(mParseType, offset +
- parse.getSpan().getStart(), offset + parse.getSpan().getEnd());
-
- parseAnnotation.setStringValue(mTypeFeature, parse.getType());
-
- cas.getIndexRepository().addFS(parseAnnotation);
- }
-
- /**
- * Releases allocated resources.
- */
- public void destroy() {
- mParser = null;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.parser;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+
+import opennlp.tools.parser.Parse;
+import opennlp.tools.parser.ParserFactory;
+import opennlp.tools.parser.ParserModel;
+import opennlp.tools.util.Span;
+import opennlp.uima.util.AnnotatorUtil;
+import opennlp.uima.util.ContainingConstraint;
+import opennlp.uima.util.UimaUtil;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.resource.ResourceAccessException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+
+/**
+ * Abstract base class for OpenNLP Parser annotators.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.ParseType</td> <td>The full name of the parse type</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.TypeFeature</td> <td>The name of the type feature</td></tr>
+ * </table>
+ * <p>
+ * Optional parameters
+ * <table border=1>
+ * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ * <tr><td>Integer</td> <td>opennlp.uima.BeamSize</td></tr>
+ * </table>
+ */
+public class Parser extends CasAnnotator_ImplBase {
+
+ private static class ParseConverter {
+ private Map<Integer, Integer> mIndexMap = new HashMap<Integer, Integer>();
+
+ private Parse mParseForTagger;
+
+ private String mSentence;
+
+ /**
+ * Initializes a new instance.
+ *
+ * @param sentence
+ * @param tokens
+ */
+ public ParseConverter(String sentence, Span tokens[]) {
+
+ mSentence = sentence;
+
+ StringBuilder sentenceStringBuilder = new StringBuilder();
+
+ String tokenList[] = new String[tokens.length];
+
+ for (int i = 0; i < tokens.length; i++) {
+ String tokenString = tokens[i].getCoveredText(sentence).toString();
+ String escapedToken = escape(tokenString);
+ tokenList[i] = escapedToken;
+
+ int escapedStart = sentenceStringBuilder.length();
+ int start = tokens[i].getStart();
+ mIndexMap.put(new Integer(escapedStart), new Integer(start));
+
+ int escapedEnd = escapedStart + escapedToken.length();
+ int end = tokens[i].getEnd();
+ mIndexMap.put(new Integer(escapedEnd), new Integer(end));
+
+ sentenceStringBuilder.append(tokenList[i]);
+
+ sentenceStringBuilder.append(' ');
+ }
+
+ // remove last space
+ sentenceStringBuilder.setLength(sentenceStringBuilder.length() - 1);
+
+ String tokenizedSentence = sentenceStringBuilder.toString();
+
+ mParseForTagger = new Parse(tokenizedSentence,
+ new Span(0, tokenizedSentence.length()), "INC", 1, null);
+
+ int start = 0;
+
+ for (int i = 0; i < tokenList.length; i++) {
+
+ mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start,
+ start + tokenList[i].length()),
+ opennlp.tools.parser.chunking.Parser.TOK_NODE, 0f, 0));
+
+ start += tokenList[i].length() + 1;
+ }
+ }
+
+ private static String escape(String text) {
+ return text;
+ }
+
+ /**
+ * Creates the parse for the tagger.
+ *
+ * @return the parse which can be passed to the tagger
+ */
+ Parse getParseForTagger() {
+ return mParseForTagger;
+ }
+
+ /**
+ * Converts the parse from the tagger back.
+ *
+ * @param parseFromTagger
+ * @return the final parse
+ */
+ Parse transformParseFromTagger(Parse parseFromTagger) {
+ int start = parseFromTagger.getSpan().getStart();
+ int end = parseFromTagger.getSpan().getEnd();
+
+
+ Parse transformedParse = new Parse(mSentence,
+ new Span(((Integer) mIndexMap.get(new Integer(start))).intValue(),
+ ((Integer) mIndexMap.get(new Integer(end))).intValue()),
+ parseFromTagger.getType(),
+ parseFromTagger.getProb(), parseFromTagger.getHeadIndex());
+
+
+ Parse[] parseFromTaggerChildrens = parseFromTagger.getChildren();
+
+ // call this method for all childs ...
+ for (int i = 0; i < parseFromTaggerChildrens.length; i++) {
+
+ Parse child = parseFromTaggerChildrens[i];
+
+ if (!child.getType().equals(
+ opennlp.tools.parser.chunking.Parser.TOK_NODE)) {
+
+ // only insert if it has childs
+ if (child.getChildCount() > 0 &&
+ !child.getChildren()[0].getType().equals(opennlp.tools.parser.chunking.Parser.TOK_NODE)) {
+ transformedParse.insert(transformParseFromTagger(child));
+ }
+ }
+ }
+
+ if (parseFromTagger.getType().equals("TOP")) {
+ return transformedParse.getChildren()[0];
+ }
+ else {
+ return transformedParse;
+ }
+ }
+
+ }
+
+ private static final String PARSE_TYPE_PARAMETER = "opennlp.uima.ParseType";
+
+ public static final String TYPE_FEATURE_PARAMETER =
+ "opennlp.uima.TypeFeature";
+
+ protected UimaContext context;
+
+ protected Logger mLogger;
+
+ private Type mSentenceType;
+
+ private Type mTokenType;
+
+ protected opennlp.tools.parser.Parser mParser;
+
+ private Type mParseType;
+
+ private Feature mTypeFeature;
+
+ /**
+ * Initializes the current instance with the given context.
+ */
+ public void initialize(UimaContext context)
+ throws ResourceInitializationException {
+
+ super.initialize(context);
+
+ this.context = context;
+
+ mLogger = context.getLogger();
+
+ if (mLogger.isLoggable(Level.INFO)) {
+ mLogger.log(Level.INFO, "Initializing the OpenNLP Parser.");
+ }
+
+ ParserModel model;
+
+ try {
+ ParserModelResource modelResource = (ParserModelResource) context
+ .getResourceObject(UimaUtil.MODEL_PARAMETER);
+
+ model = modelResource.getModel();
+ } catch (ResourceAccessException e) {
+ throw new ResourceInitializationException(e);
+ }
+
+ mParser = ParserFactory.create(model);
+ }
+
+ /**
+ * Initializes the type system.
+ */
+ public void typeSystemInit(TypeSystem typeSystem)
+ throws AnalysisEngineProcessException {
+
+ mSentenceType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
+ UimaUtil.SENTENCE_TYPE_PARAMETER);
+
+ mTokenType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
+ UimaUtil.TOKEN_TYPE_PARAMETER);
+
+ mParseType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
+ PARSE_TYPE_PARAMETER);
+
+ mTypeFeature = AnnotatorUtil.getRequiredFeatureParameter(context,
+ mParseType, TYPE_FEATURE_PARAMETER, CAS.TYPE_NAME_STRING);
+ }
+
+ /**
+ * Performs parsing on the given {@link CAS} object.
+ */
+ public void process(CAS cas) {
+ FSIndex<AnnotationFS> sentences = cas.getAnnotationIndex(mSentenceType);
+
+ Iterator<AnnotationFS> sentencesIterator = sentences.iterator();
+
+ while (sentencesIterator.hasNext()) {
+ AnnotationFS sentence = (AnnotationFS) sentencesIterator.next();
+
+ process(cas, sentence);
+ }
+ }
+
+ protected void process(CAS cas, AnnotationFS sentenceAnnotation) {
+ FSIndex<AnnotationFS> allTokens = cas.getAnnotationIndex(mTokenType);
+
+ ContainingConstraint containingConstraint =
+ new ContainingConstraint(sentenceAnnotation);
+
+ Iterator<AnnotationFS> containingTokens = cas.createFilteredIterator(
+ allTokens.iterator(), containingConstraint);
+
+ StringBuilder sentenceStringBuilder = new StringBuilder();
+
+ while (containingTokens.hasNext()) {
+ AnnotationFS token = (AnnotationFS) containingTokens.next();
+
+ sentenceStringBuilder.append(token.getCoveredText());
+
+ // attention the offsets moves inside the sentence...
+ sentenceStringBuilder.append(' ');
+ }
+
+ String sentence = sentenceStringBuilder.toString();
+ sentence = sentenceAnnotation.getCoveredText();
+
+ containingTokens = cas.createFilteredIterator(
+ allTokens.iterator(), containingConstraint);
+
+ List<Span> tokenSpans = new LinkedList<Span>();
+
+ while(containingTokens.hasNext()) {
+ AnnotationFS token = (AnnotationFS) containingTokens.next();
+
+ tokenSpans.add(new Span(token.getBegin() - sentenceAnnotation.getBegin(),
+ token.getEnd() - sentenceAnnotation.getBegin()));
+ }
+
+ ParseConverter converter = new ParseConverter(sentence,(Span[])
+ tokenSpans.toArray(new Span[tokenSpans.size()]));
+
+ Parse parse = mParser.parse(converter.getParseForTagger());
+
+ parse = converter.transformParseFromTagger(parse);
+
+ if (mLogger.isLoggable(Level.INFO)) {
+ StringBuffer parseString = new StringBuffer();
+ parse.show(parseString);
+
+ mLogger.log(Level.INFO, parseString.toString());
+ }
+
+ createAnnotation(cas, sentenceAnnotation.getBegin(), parse);
+ }
+
+ protected void createAnnotation(CAS cas, int offset, Parse parse) {
+
+ Parse parseChildrens[] = parse.getChildren();
+
+ // do this for all children
+ for (int i = 0; i < parseChildrens.length; i++) {
+ Parse child = parseChildrens[i];
+ createAnnotation(cas, offset, child);
+ }
+
+ AnnotationFS parseAnnotation = cas.createAnnotation(mParseType, offset +
+ parse.getSpan().getStart(), offset + parse.getSpan().getEnd());
+
+ parseAnnotation.setStringValue(mTypeFeature, parse.getType());
+
+ cas.getIndexRepository().addFS(parseAnnotation);
+ }
+
+ /**
+ * Releases allocated resources.
+ */
+ public void destroy() {
+ mParser = null;
+ }
+}
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/ParserModelResource.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/ParserModelResourceImpl.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResource.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResourceImpl.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java Mon Nov 7 19:07:33 2011
@@ -1,242 +1,242 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.postag;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import opennlp.maxent.GIS;
-import opennlp.tools.postag.POSDictionary;
-import opennlp.tools.postag.POSModel;
-import opennlp.tools.postag.POSSample;
-import opennlp.tools.postag.POSTaggerME;
-import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.model.ModelType;
-import opennlp.uima.util.AnnotatorUtil;
-import opennlp.uima.util.CasConsumerUtil;
-import opennlp.uima.util.ContainingConstraint;
-import opennlp.uima.util.OpennlpUtil;
-import opennlp.uima.util.UimaUtil;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
-/**
- * OpenNLP POSTagger trainer.
- * <p>
- * Mandatory parameters
- * <table border=1>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
- * <tr><td>String</td> <td>pennlp.uima.POSFeature</td> <td>The name of the token pos feature,
- * the feature must be of type String</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.TagDictionaryName</td></tr>
- * </table>
- */
-public class POSTaggerTrainer extends CasConsumer_ImplBase {
-
- public static final String TAG_DICTIONARY_NAME = "opennlp.uima.TagDictionaryName";
-
- private UimaContext mContext;
-
- private Type mSentenceType;
-
- private Type mTokenType;
-
- private String mModelName;
-
- private Feature mPOSFeature;
-
- private Logger mLogger;
-
- private List<POSSample> mPOSSamples = new ArrayList<POSSample>();
-
- private String language;
-
- private POSDictionary tagDictionary;
-
- /**
- * Initializes the current instance.
- */
- public void initialize() throws ResourceInitializationException {
-
- super.initialize();
-
- mContext = getUimaContext();
-
- mLogger = mContext.getLogger();
-
- if (mLogger.isLoggable(Level.INFO)) {
- mLogger.log(Level.INFO, "Initializing the OpenNLP " +
- "POSTagger trainer.");
- }
-
- mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
- UimaUtil.MODEL_PARAMETER);
-
- language = CasConsumerUtil.getRequiredStringParameter(mContext,
- UimaUtil.LANGUAGE_PARAMETER);
-
- String tagDictionaryName = CasConsumerUtil.getOptionalStringParameter(mContext,
- TAG_DICTIONARY_NAME);
-
- if (tagDictionaryName != null) {
- try {
- InputStream dictIn = AnnotatorUtil.getResourceAsStream(mContext, tagDictionaryName);
-
- // TODO: ask Tom if case sensitivity must be configureable
- tagDictionary = new POSDictionary(new BufferedReader(new InputStreamReader(dictIn)), false);
-
- } catch (final IOException e) {
- // if this fails just print error message and continue
- final String message = "IOException during tag dictionary reading, "
- + "running without tag dictionary: " + e.getMessage();
-
- if (this.mLogger.isLoggable(Level.WARNING)) {
- this.mLogger.log(Level.WARNING, message);
- }
- }
- }
- }
-
- /**
- * Initialize the current instance with the given type system.
- */
- public void typeSystemInit(TypeSystem typeSystem)
- throws ResourceInitializationException {
- String sentenceTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
- UimaUtil.SENTENCE_TYPE_PARAMETER);
-
- if (mLogger.isLoggable(Level.INFO)) {
- mLogger.log(Level.INFO, UimaUtil.SENTENCE_TYPE_PARAMETER + ": " +
- sentenceTypeName);
- }
-
- mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
-
- String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
- UimaUtil.TOKEN_TYPE_PARAMETER);
-
- mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
-
- String posFeatureName = CasConsumerUtil.getRequiredStringParameter(mContext,
- UimaUtil.POS_FEATURE_PARAMETER);
-
- mPOSFeature = mTokenType.getFeatureByBaseName(posFeatureName);
- }
-
- /**
- * Process the given CAS object.
- */
- public void processCas(CAS cas) {
-
- FSIndex<AnnotationFS> sentenceAnnotations = cas.getAnnotationIndex(mSentenceType);
-
- Iterator<AnnotationFS> sentenceAnnotationsIterator = sentenceAnnotations.iterator();
-
- while (sentenceAnnotationsIterator.hasNext()) {
-
- AnnotationFS sentence =
- (AnnotationFS) sentenceAnnotationsIterator.next();
-
- process(cas, sentence);
- }
- }
-
- private void process(CAS tcas, AnnotationFS sentence) {
-
- FSIndex<AnnotationFS> allTokens = tcas.getAnnotationIndex(mTokenType);
-
- ContainingConstraint containingConstraint =
- new ContainingConstraint(sentence);
-
- List<String> tokens = new ArrayList<String>();
- List<String> tags = new ArrayList<String>();
-
- Iterator<AnnotationFS> containingTokens = tcas.createFilteredIterator(
- allTokens.iterator(), containingConstraint);
-
- while (containingTokens.hasNext()) {
-
- AnnotationFS tokenAnnotation = (AnnotationFS) containingTokens.next();
-
- String tag = tokenAnnotation.getFeatureValueAsString(mPOSFeature);
-
- tokens.add(tokenAnnotation.getCoveredText().trim());
- tags.add(tag);
- }
-
- mPOSSamples.add(new POSSample(tokens, tags));
- }
-
- /**
- * Called if the processing is finished, this method
- * does the training.
- */
- public void collectionProcessComplete(ProcessTrace trace)
- throws ResourceProcessException, IOException {
-
- GIS.PRINT_MESSAGES = false;
-
- POSModel posTaggerModel = POSTaggerME.train(language,
- ObjectStreamUtils.createObjectStream(mPOSSamples),
- ModelType.MAXENT, tagDictionary, null, 100, 5);
-
- // dereference to allow garbage collection
- mPOSSamples = null;
-
- File modelFile = new File(getUimaContextAdmin().getResourceManager()
- .getDataPath() + File.separatorChar + mModelName);
-
- OpennlpUtil.serialize(posTaggerModel, modelFile);
- }
-
- /**
- * The trainer is not stateless.
- */
- public boolean isStateless() {
- return false;
- }
-
- /**
- * Releases allocated resources.
- */
- public void destroy() {
- // dereference to allow garbage collection
- mPOSSamples = null;
- }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.postag;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import opennlp.maxent.GIS;
+import opennlp.tools.postag.POSDictionary;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.util.ObjectStreamUtils;
+import opennlp.tools.util.model.ModelType;
+import opennlp.uima.util.AnnotatorUtil;
+import opennlp.uima.util.CasConsumerUtil;
+import opennlp.uima.util.ContainingConstraint;
+import opennlp.uima.util.OpennlpUtil;
+import opennlp.uima.util.UimaUtil;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.ProcessTrace;
+
+/**
+ * OpenNLP POSTagger trainer.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
+ * <tr><td>String</td> <td>pennlp.uima.POSFeature</td> <td>The name of the token pos feature,
+ * the feature must be of type String</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.TagDictionaryName</td></tr>
+ * </table>
+ */
+public class POSTaggerTrainer extends CasConsumer_ImplBase {
+
+ public static final String TAG_DICTIONARY_NAME = "opennlp.uima.TagDictionaryName";
+
+ private UimaContext mContext;
+
+ private Type mSentenceType;
+
+ private Type mTokenType;
+
+ private String mModelName;
+
+ private Feature mPOSFeature;
+
+ private Logger mLogger;
+
+ private List<POSSample> mPOSSamples = new ArrayList<POSSample>();
+
+ private String language;
+
+ private POSDictionary tagDictionary;
+
+ /**
+ * Initializes the current instance.
+ */
+ public void initialize() throws ResourceInitializationException {
+
+ super.initialize();
+
+ mContext = getUimaContext();
+
+ mLogger = mContext.getLogger();
+
+ if (mLogger.isLoggable(Level.INFO)) {
+ mLogger.log(Level.INFO, "Initializing the OpenNLP " +
+ "POSTagger trainer.");
+ }
+
+ mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
+ UimaUtil.MODEL_PARAMETER);
+
+ language = CasConsumerUtil.getRequiredStringParameter(mContext,
+ UimaUtil.LANGUAGE_PARAMETER);
+
+ String tagDictionaryName = CasConsumerUtil.getOptionalStringParameter(mContext,
+ TAG_DICTIONARY_NAME);
+
+ if (tagDictionaryName != null) {
+ try {
+ InputStream dictIn = AnnotatorUtil.getResourceAsStream(mContext, tagDictionaryName);
+
+ // TODO: ask Tom if case sensitivity must be configureable
+ tagDictionary = new POSDictionary(new BufferedReader(new InputStreamReader(dictIn)), false);
+
+ } catch (final IOException e) {
+ // if this fails just print error message and continue
+ final String message = "IOException during tag dictionary reading, "
+ + "running without tag dictionary: " + e.getMessage();
+
+ if (this.mLogger.isLoggable(Level.WARNING)) {
+ this.mLogger.log(Level.WARNING, message);
+ }
+ }
+ }
+ }
+
+ /**
+ * Initialize the current instance with the given type system.
+ */
+ public void typeSystemInit(TypeSystem typeSystem)
+ throws ResourceInitializationException {
+ String sentenceTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
+ UimaUtil.SENTENCE_TYPE_PARAMETER);
+
+ if (mLogger.isLoggable(Level.INFO)) {
+ mLogger.log(Level.INFO, UimaUtil.SENTENCE_TYPE_PARAMETER + ": " +
+ sentenceTypeName);
+ }
+
+ mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
+
+ String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
+ UimaUtil.TOKEN_TYPE_PARAMETER);
+
+ mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
+
+ String posFeatureName = CasConsumerUtil.getRequiredStringParameter(mContext,
+ UimaUtil.POS_FEATURE_PARAMETER);
+
+ mPOSFeature = mTokenType.getFeatureByBaseName(posFeatureName);
+ }
+
+ /**
+ * Process the given CAS object.
+ */
+ public void processCas(CAS cas) {
+
+ FSIndex<AnnotationFS> sentenceAnnotations = cas.getAnnotationIndex(mSentenceType);
+
+ Iterator<AnnotationFS> sentenceAnnotationsIterator = sentenceAnnotations.iterator();
+
+ while (sentenceAnnotationsIterator.hasNext()) {
+
+ AnnotationFS sentence =
+ (AnnotationFS) sentenceAnnotationsIterator.next();
+
+ process(cas, sentence);
+ }
+ }
+
+ private void process(CAS tcas, AnnotationFS sentence) {
+
+ FSIndex<AnnotationFS> allTokens = tcas.getAnnotationIndex(mTokenType);
+
+ ContainingConstraint containingConstraint =
+ new ContainingConstraint(sentence);
+
+ List<String> tokens = new ArrayList<String>();
+ List<String> tags = new ArrayList<String>();
+
+ Iterator<AnnotationFS> containingTokens = tcas.createFilteredIterator(
+ allTokens.iterator(), containingConstraint);
+
+ while (containingTokens.hasNext()) {
+
+ AnnotationFS tokenAnnotation = (AnnotationFS) containingTokens.next();
+
+ String tag = tokenAnnotation.getFeatureValueAsString(mPOSFeature);
+
+ tokens.add(tokenAnnotation.getCoveredText().trim());
+ tags.add(tag);
+ }
+
+ mPOSSamples.add(new POSSample(tokens, tags));
+ }
+
+ /**
+ * Called if the processing is finished, this method
+ * does the training.
+ */
+ public void collectionProcessComplete(ProcessTrace trace)
+ throws ResourceProcessException, IOException {
+
+ GIS.PRINT_MESSAGES = false;
+
+ POSModel posTaggerModel = POSTaggerME.train(language,
+ ObjectStreamUtils.createObjectStream(mPOSSamples),
+ ModelType.MAXENT, tagDictionary, null, 100, 5);
+
+ // dereference to allow garbage collection
+ mPOSSamples = null;
+
+ File modelFile = new File(getUimaContextAdmin().getResourceManager()
+ .getDataPath() + File.separatorChar + mModelName);
+
+ OpennlpUtil.serialize(posTaggerModel, modelFile);
+ }
+
+ /**
+ * The trainer is not stateless.
+ */
+ public boolean isStateless() {
+ return false;
+ }
+
+ /**
+ * Releases allocated resources.
+ */
+ public void destroy() {
+ // dereference to allow garbage collection
+ mPOSSamples = null;
+ }
}
\ No newline at end of file
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java Mon Nov 7 19:07:33 2011
@@ -1,133 +1,133 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.sentdetect;
-
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.util.Span;
-import opennlp.uima.util.AnnotatorUtil;
-import opennlp.uima.util.UimaUtil;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.resource.ResourceAccessException;
-import org.apache.uima.resource.ResourceInitializationException;
-
-/**
- * OpenNLP Sentence annotator.
- * <p>
- * Mandatory parameters
- * <table border=1>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- * </table>
- * <p>
- * Optional parameters
- * <table border=1>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ContainerType</td> <td>The name of the container type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.ProbabilityFeature</td> <td>The name of the double
- * probability feature (not set by default)</td></tr>
- * </table>
- */
-public final class SentenceDetector extends AbstractSentenceDetector {
-
- /**
- * OpenNLP sentence detector.
- */
- private SentenceDetectorME sentenceDetector;
-
- private Feature probabilityFeature;
-
- /**
- * Initializes a new instance.
- *
- * Note: Use {@link #initialize(UimaContext) } to initialize
- * this instance. Not use the constructor.
- */
- public SentenceDetector() {
- // must not be implemented !
- }
-
- /**
- * Initializes the current instance with the given context.
- *
- * Note: Do all initialization in this method, do not use the constructor.
- */
- public void initialize(UimaContext context)
- throws ResourceInitializationException {
-
- super.initialize(context);
-
- SentenceModel model;
-
- try {
- SentenceModelResource modelResource = (SentenceModelResource) context
- .getResourceObject(UimaUtil.MODEL_PARAMETER);
-
- model = modelResource.getModel();
- } catch (ResourceAccessException e) {
- throw new ResourceInitializationException(e);
- }
-
- sentenceDetector = new SentenceDetectorME(model);
- }
-
- /**
- * Initializes the type system.
- */
- public void typeSystemInit(TypeSystem typeSystem)
- throws AnalysisEngineProcessException {
-
- super.typeSystemInit(typeSystem);
-
- probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context,
- sentenceType, UimaUtil.PROBABILITY_FEATURE_PARAMETER,
- CAS.TYPE_NAME_DOUBLE);
- }
-
- @Override
- protected Span[] detectSentences(String text) {
- return sentenceDetector.sentPosDetect(text);
- }
-
- @Override
- protected void postProcessAnnotations(AnnotationFS sentences[]) {
-
- if (probabilityFeature != null) {
- double sentenceProbabilities[] = sentenceDetector.getSentenceProbabilities();
-
- for (int i = 0; i < sentences.length; i++) {
- sentences[i].setDoubleValue(probabilityFeature, sentenceProbabilities[i]);
- }
- }
- }
-
- /**
- * Releases allocated resources.
- */
- public void destroy() {
- // dereference model to allow garbage collection
- sentenceDetector = null;
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.sentdetect;
+
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.util.Span;
+import opennlp.uima.util.AnnotatorUtil;
+import opennlp.uima.util.UimaUtil;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.resource.ResourceAccessException;
+import org.apache.uima.resource.ResourceInitializationException;
+
+/**
+ * OpenNLP Sentence annotator.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ * </table>
+ * <p>
+ * Optional parameters
+ * <table border=1>
+ * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ * <tr><td>String</td> <td>opennlp.uima.ContainerType</td> <td>The name of the container type</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.ProbabilityFeature</td> <td>The name of the double
+ * probability feature (not set by default)</td></tr>
+ * </table>
+ */
+public final class SentenceDetector extends AbstractSentenceDetector {
+
+ /**
+ * OpenNLP sentence detector.
+ */
+ private SentenceDetectorME sentenceDetector;
+
+ private Feature probabilityFeature;
+
+ /**
+ * Initializes a new instance.
+ *
+ * Note: Use {@link #initialize(UimaContext) } to initialize
+ * this instance. Not use the constructor.
+ */
+ public SentenceDetector() {
+ // must not be implemented !
+ }
+
+ /**
+ * Initializes the current instance with the given context.
+ *
+ * Note: Do all initialization in this method, do not use the constructor.
+ */
+ public void initialize(UimaContext context)
+ throws ResourceInitializationException {
+
+ super.initialize(context);
+
+ SentenceModel model;
+
+ try {
+ SentenceModelResource modelResource = (SentenceModelResource) context
+ .getResourceObject(UimaUtil.MODEL_PARAMETER);
+
+ model = modelResource.getModel();
+ } catch (ResourceAccessException e) {
+ throw new ResourceInitializationException(e);
+ }
+
+ sentenceDetector = new SentenceDetectorME(model);
+ }
+
+ /**
+ * Initializes the type system.
+ */
+ public void typeSystemInit(TypeSystem typeSystem)
+ throws AnalysisEngineProcessException {
+
+ super.typeSystemInit(typeSystem);
+
+ probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context,
+ sentenceType, UimaUtil.PROBABILITY_FEATURE_PARAMETER,
+ CAS.TYPE_NAME_DOUBLE);
+ }
+
+ @Override
+ protected Span[] detectSentences(String text) {
+ return sentenceDetector.sentPosDetect(text);
+ }
+
+ @Override
+ protected void postProcessAnnotations(AnnotationFS sentences[]) {
+
+ if (probabilityFeature != null) {
+ double sentenceProbabilities[] = sentenceDetector.getSentenceProbabilities();
+
+ for (int i = 0; i < sentences.length; i++) {
+ sentences[i].setDoubleValue(probabilityFeature, sentenceProbabilities[i]);
+ }
+ }
+ }
+
+ /**
+ * Releases allocated resources.
+ */
+ public void destroy() {
+ // dereference model to allow garbage collection
+ sentenceDetector = null;
+ }
+}
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java Mon Nov 7 19:07:33 2011
@@ -1,163 +1,163 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.sentdetect;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import opennlp.maxent.GIS;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.sentdetect.SentenceSample;
-import opennlp.tools.util.ObjectStreamUtils;
-import opennlp.tools.util.Span;
-import opennlp.uima.util.CasConsumerUtil;
-import opennlp.uima.util.OpennlpUtil;
-import opennlp.uima.util.UimaUtil;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.FSIndex;
-import org.apache.uima.cas.Type;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.collection.CasConsumer_ImplBase;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceProcessException;
-import org.apache.uima.util.Level;
-import org.apache.uima.util.Logger;
-import org.apache.uima.util.ProcessTrace;
-
-/**
- * OpenNLP SentenceDetector trainer.
- * <p>
- * Mandatory parameters
- * <table border=1>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- * </table>
- */
-public final class SentenceDetectorTrainer extends CasConsumer_ImplBase {
-
- private List<SentenceSample> sentenceSamples = new ArrayList<SentenceSample>();
-
- private Type mSentenceType;
-
- private String mModelName;
-
- private String language = "en";
-
- private Logger mLogger;
-
- private UimaContext mContext;
-
- /**
- * Initializes the current instance.
- */
- public void initialize() throws ResourceInitializationException {
-
- super.initialize();
-
- mContext = getUimaContext();
-
- mLogger = mContext.getLogger();
-
- if (mLogger.isLoggable(Level.INFO)) {
- mLogger.log(Level.INFO, "Initializing the OpenNLP SentenceDetector " +
- "trainer.");
- }
-
- mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
- UimaUtil.MODEL_PARAMETER);
-
- language = CasConsumerUtil.getRequiredStringParameter(mContext,
- UimaUtil.LANGUAGE_PARAMETER);
- }
-
- /**
- * Initializes the current instance with the given type system.
- */
- public void typeSystemInit(TypeSystem typeSystem)
- throws ResourceInitializationException {
-
- String sentenceTypeName =
- CasConsumerUtil.getRequiredStringParameter(mContext,
- UimaUtil.SENTENCE_TYPE_PARAMETER);
-
- mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
- }
-
- /**
- * Process the given CAS object.
- */
- public void processCas(CAS cas) {
-
- FSIndex<AnnotationFS> sentenceIndex = cas.getAnnotationIndex(mSentenceType);
-
- Span[] sentSpans = new Span[sentenceIndex.size()];
-
- int i = 0;
- Iterator<AnnotationFS> sentenceIterator = sentenceIndex.iterator();
- while (sentenceIterator.hasNext()) {
- AnnotationFS sentenceAnnotation = (AnnotationFS) sentenceIterator.next();
-
- sentSpans[i++] = new Span(sentenceAnnotation.getBegin(), sentenceAnnotation.getEnd());
- }
-
- sentenceSamples.add(new SentenceSample(cas.getDocumentText(), sentSpans));
- }
-
- /**
- * Called if the processing is finished, this method
- * does the training.
- */
- public void collectionProcessComplete(ProcessTrace trace)
- throws ResourceProcessException, IOException {
- GIS.PRINT_MESSAGES = false;
-
- SentenceModel sentenceModel = SentenceDetectorME.train(language,
- ObjectStreamUtils.createObjectStream(sentenceSamples), true, null);
-
- // dereference to allow garbage collection
- sentenceSamples = null;
-
- File modelFile = new File(getUimaContextAdmin().getResourceManager()
- .getDataPath() + File.separatorChar + mModelName);
-
- OpennlpUtil.serialize(sentenceModel,modelFile);
- }
-
- /**
- * The trainer is not stateless.
- */
- public boolean isStateless() {
- return false;
- }
-
- /**
- * Releases allocated resources.
- */
- public void destroy() {
- // dereference to allow garbage collection
- sentenceSamples = null;
- }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.sentdetect;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import opennlp.maxent.GIS;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.sentdetect.SentenceSample;
+import opennlp.tools.util.ObjectStreamUtils;
+import opennlp.tools.util.Span;
+import opennlp.uima.util.CasConsumerUtil;
+import opennlp.uima.util.OpennlpUtil;
+import opennlp.uima.util.UimaUtil;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIndex;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.collection.CasConsumer_ImplBase;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.ResourceProcessException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.ProcessTrace;
+
+/**
+ * OpenNLP SentenceDetector trainer.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ * </table>
+ */
+public final class SentenceDetectorTrainer extends CasConsumer_ImplBase {
+
+ private List<SentenceSample> sentenceSamples = new ArrayList<SentenceSample>();
+
+ private Type mSentenceType;
+
+ private String mModelName;
+
+ private String language = "en";
+
+ private Logger mLogger;
+
+ private UimaContext mContext;
+
+ /**
+ * Initializes the current instance.
+ */
+ public void initialize() throws ResourceInitializationException {
+
+ super.initialize();
+
+ mContext = getUimaContext();
+
+ mLogger = mContext.getLogger();
+
+ if (mLogger.isLoggable(Level.INFO)) {
+ mLogger.log(Level.INFO, "Initializing the OpenNLP SentenceDetector " +
+ "trainer.");
+ }
+
+ mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
+ UimaUtil.MODEL_PARAMETER);
+
+ language = CasConsumerUtil.getRequiredStringParameter(mContext,
+ UimaUtil.LANGUAGE_PARAMETER);
+ }
+
+ /**
+ * Initializes the current instance with the given type system.
+ */
+ public void typeSystemInit(TypeSystem typeSystem)
+ throws ResourceInitializationException {
+
+ String sentenceTypeName =
+ CasConsumerUtil.getRequiredStringParameter(mContext,
+ UimaUtil.SENTENCE_TYPE_PARAMETER);
+
+ mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
+ }
+
+ /**
+ * Process the given CAS object.
+ */
+ public void processCas(CAS cas) {
+
+ FSIndex<AnnotationFS> sentenceIndex = cas.getAnnotationIndex(mSentenceType);
+
+ Span[] sentSpans = new Span[sentenceIndex.size()];
+
+ int i = 0;
+ Iterator<AnnotationFS> sentenceIterator = sentenceIndex.iterator();
+ while (sentenceIterator.hasNext()) {
+ AnnotationFS sentenceAnnotation = (AnnotationFS) sentenceIterator.next();
+
+ sentSpans[i++] = new Span(sentenceAnnotation.getBegin(), sentenceAnnotation.getEnd());
+ }
+
+ sentenceSamples.add(new SentenceSample(cas.getDocumentText(), sentSpans));
+ }
+
+ /**
+ * Called if the processing is finished, this method
+ * does the training.
+ */
+ public void collectionProcessComplete(ProcessTrace trace)
+ throws ResourceProcessException, IOException {
+ GIS.PRINT_MESSAGES = false;
+
+ SentenceModel sentenceModel = SentenceDetectorME.train(language,
+ ObjectStreamUtils.createObjectStream(sentenceSamples), true, null);
+
+ // dereference to allow garbage collection
+ sentenceSamples = null;
+
+ File modelFile = new File(getUimaContextAdmin().getResourceManager()
+ .getDataPath() + File.separatorChar + mModelName);
+
+ OpennlpUtil.serialize(sentenceModel,modelFile);
+ }
+
+ /**
+ * The trainer is not stateless.
+ */
+ public boolean isStateless() {
+ return false;
+ }
+
+ /**
+ * Releases allocated resources.
+ */
+ public void destroy() {
+ // dereference to allow garbage collection
+ sentenceSamples = null;
+ }
}
\ No newline at end of file
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResource.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/package.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java Mon Nov 7 19:07:33 2011
@@ -1,59 +1,59 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.tokenize;
-
-import opennlp.tools.util.Span;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.text.AnnotationFS;
-
-/**
- * OpenNLP Simple Tokenizer annotator.
- * <p>
- * Mandatory parameters
- * <table border=1>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
- * </table>
- */
-public final class SimpleTokenizer extends AbstractTokenizer {
-
- /**
- * The OpenNLP simple tokenizer.
- */
- private opennlp.tools.tokenize.SimpleTokenizer tokenizer =
- opennlp.tools.tokenize.SimpleTokenizer.INSTANCE;
-
- /**
- * Initializes the current instance.
- *
- * Note: Use {@link #initialize(UimaContext) } to initialize
- * this instance. Not use the constructor.
- */
- public SimpleTokenizer() {
- super("OpenNLP Simple Tokenizer");
- // must not be implemented !
- }
-
- @Override
- protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
- return tokenizer.tokenizePos(sentence.getCoveredText());
- }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.tokenize;
+
+import opennlp.tools.util.Span;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.text.AnnotationFS;
+
+/**
+ * OpenNLP Simple Tokenizer annotator.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
+ * </table>
+ */
+public final class SimpleTokenizer extends AbstractTokenizer {
+
+ /**
+ * The OpenNLP simple tokenizer.
+ */
+ private opennlp.tools.tokenize.SimpleTokenizer tokenizer =
+ opennlp.tools.tokenize.SimpleTokenizer.INSTANCE;
+
+ /**
+ * Initializes the current instance.
+ *
+ * Note: Use {@link #initialize(UimaContext) } to initialize
+ * this instance. Not use the constructor.
+ */
+ public SimpleTokenizer() {
+ super("OpenNLP Simple Tokenizer");
+ // must not be implemented !
+ }
+
+ @Override
+ protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
+ return tokenizer.tokenizePos(sentence.getCoveredText());
+ }
}
\ No newline at end of file
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
URL: http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java?rev=1198880&r1=1198879&r2=1198880&view=diff
==============================================================================
--- incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java (original)
+++ incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java Mon Nov 7 19:07:33 2011
@@ -1,139 +1,139 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreemnets. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package opennlp.uima.tokenize;
-
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import opennlp.tools.util.Span;
-import opennlp.uima.util.AnnotatorUtil;
-import opennlp.uima.util.UimaUtil;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.TypeSystem;
-import org.apache.uima.cas.text.AnnotationFS;
-import org.apache.uima.resource.ResourceAccessException;
-import org.apache.uima.resource.ResourceInitializationException;
-
-/**
- * OpenNLP Tokenizer annotator.
- * <p>
- * Mandatory parameters
- * <table border=1>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
- * <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
- * </table>
- * <p>
- * Optional parameters
- * <table border=1>
- * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ProbabilityFeature</td> <td>The name of the double
- * probability feature (not set by default)</td>
- * </table>
- * @see {@link TokenizerME}
- */
-public final class Tokenizer extends AbstractTokenizer {
-
- /**
- * The OpenNLP tokenizer.
- */
- private TokenizerME tokenizer;
-
- private Feature probabilityFeature;
-
- /**
- * Initializes a new instance.
- *
- * Note: Use {@link #initialize(UimaContext) } to initialize
- * this instance. Not use the constructor.
- */
- public Tokenizer() {
- super("OpenNLP Tokenizer");
-
- // must not be implemented !
- }
-
- /**
- * Initializes the current instance with the given context.
- *
- * Note: Do all initialization in this method, do not use the constructor.
- */
- public void initialize(UimaContext context)
- throws ResourceInitializationException {
-
- super.initialize(context);
-
- TokenizerModel model;
-
- try {
- TokenizerModelResource modelResource = (TokenizerModelResource) context
- .getResourceObject(UimaUtil.MODEL_PARAMETER);
-
- model = modelResource.getModel();
- } catch (ResourceAccessException e) {
- throw new ResourceInitializationException(e);
- }
-
- tokenizer = new TokenizerME(model);
- }
-
- /**
- * Initializes the type system.
- */
- public void typeSystemInit(TypeSystem typeSystem)
- throws AnalysisEngineProcessException {
-
- super.typeSystemInit(typeSystem);
-
- probabilityFeature = AnnotatorUtil
- .getOptionalFeatureParameter(context, tokenType,
- UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);
- }
-
-
- @Override
- protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
- return tokenizer.tokenizePos(sentence.getCoveredText());
- }
-
- @Override
- protected void postProcessAnnotations(Span[] tokens,
- AnnotationFS[] tokenAnnotations) {
- // if interest
- if (probabilityFeature != null) {
- double tokenProbabilties[] = tokenizer.getTokenProbabilities();
-
- for (int i = 0; i < tokenAnnotations.length; i++) {
- tokenAnnotations[i].setDoubleValue(probabilityFeature,
- tokenProbabilties[i]);
- }
- }
- }
-
- /**
- * Releases allocated resources.
- */
- public void destroy() {
- // dereference model to allow garbage collection
- tokenizer = null;
- }
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreemnets. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.uima.tokenize;
+
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.Span;
+import opennlp.uima.util.AnnotatorUtil;
+import opennlp.uima.util.UimaUtil;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.resource.ResourceAccessException;
+import org.apache.uima.resource.ResourceInitializationException;
+
+/**
+ * OpenNLP Tokenizer annotator.
+ * <p>
+ * Mandatory parameters
+ * <table border=1>
+ * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ * <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
+ * <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
+ * </table>
+ * <p>
+ * Optional parameters
+ * <table border=1>
+ * <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
+ * <tr><td>String</td> <td>opennlp.uima.ProbabilityFeature</td> <td>The name of the double
+ * probability feature (not set by default)</td>
+ * </table>
+ * @see {@link TokenizerME}
+ */
+public final class Tokenizer extends AbstractTokenizer {
+
+ /**
+ * The OpenNLP tokenizer.
+ */
+ private TokenizerME tokenizer;
+
+ private Feature probabilityFeature;
+
+ /**
+ * Initializes a new instance.
+ *
+ * Note: Use {@link #initialize(UimaContext) } to initialize
+ * this instance. Not use the constructor.
+ */
+ public Tokenizer() {
+ super("OpenNLP Tokenizer");
+
+ // must not be implemented !
+ }
+
+ /**
+ * Initializes the current instance with the given context.
+ *
+ * Note: Do all initialization in this method, do not use the constructor.
+ */
+ public void initialize(UimaContext context)
+ throws ResourceInitializationException {
+
+ super.initialize(context);
+
+ TokenizerModel model;
+
+ try {
+ TokenizerModelResource modelResource = (TokenizerModelResource) context
+ .getResourceObject(UimaUtil.MODEL_PARAMETER);
+
+ model = modelResource.getModel();
+ } catch (ResourceAccessException e) {
+ throw new ResourceInitializationException(e);
+ }
+
+ tokenizer = new TokenizerME(model);
+ }
+
+ /**
+ * Initializes the type system.
+ */
+ public void typeSystemInit(TypeSystem typeSystem)
+ throws AnalysisEngineProcessException {
+
+ super.typeSystemInit(typeSystem);
+
+ probabilityFeature = AnnotatorUtil
+ .getOptionalFeatureParameter(context, tokenType,
+ UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);
+ }
+
+
+ @Override
+ protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
+ return tokenizer.tokenizePos(sentence.getCoveredText());
+ }
+
+ @Override
+ protected void postProcessAnnotations(Span[] tokens,
+ AnnotationFS[] tokenAnnotations) {
+ // if interest
+ if (probabilityFeature != null) {
+ double tokenProbabilties[] = tokenizer.getTokenProbabilities();
+
+ for (int i = 0; i < tokenAnnotations.length; i++) {
+ tokenAnnotations[i].setDoubleValue(probabilityFeature,
+ tokenProbabilties[i]);
+ }
+ }
+ }
+
+ /**
+ * Releases allocated resources.
+ */
+ public void destroy() {
+ // dereference model to allow garbage collection
+ tokenizer = null;
+ }
}
\ No newline at end of file
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerModelResource.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerModelResourceImpl.java
------------------------------------------------------------------------------
svn:eol-style = native