You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/05/02 14:34:33 UTC
svn commit: r1591889 [13/14] - in /opennlp/trunk: opennlp-tools/lang/ml/
opennlp-tools/src/main/java/opennlp/tools/chunker/
opennlp-tools/src/main/java/opennlp/tools/cmdline/
opennlp-tools/src/main/java/opennlp/tools/cmdline/chunker/
opennlp-tools/src/...
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/NameFinderTrainer.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.namefind;
@@ -74,7 +74,7 @@ import org.apache.uima.util.ProcessTrace
* <tr><td>String</td> <td>opennlp.uima.TokenType</td> <td>The full name of the token type</td></tr>
* <tr><td>String</td> <td>opennlp.uima.NameType</td> <td>The full name of the name type</td></tr>
* </table>
- *
+ *
* Optional parameters
* <table border=1>
* <caption></caption>
@@ -90,95 +90,95 @@ import org.apache.uima.util.ProcessTrace
* <p>
*/
public final class NameFinderTrainer extends CasConsumer_ImplBase {
-
+
private static final String FEATURE_GENERATOR_DEFINITION_FILE_PARAMETER = "opennlp.uima.FeatureGeneratorFile";
private static final String FEATURE_GENERATOR_RESOURCES_PARAMETER = "opennlp.uima.FeatureGeneratorResources";
-
+
private Logger logger;
-
+
private String modelPath;
-
+
private byte featureGeneratorDefinition[];
-
+
private File featureGeneratorResourceDir;
-
+
private String additionalTrainingDataFile;
-
+
private String additionalTrainingDataEncoding;
-
+
private File sampleTraceFile = null;
-
+
private String sampleTraceFileEncoding = null;
-
+
private Type sentenceType;
private Type tokenType;
private Type nameType;
-
+
private String language;
-
+
// TODO: Keeping all events in memory limits the size of the training corpus
// Possible solutions:
// - Write all events to disk
// - Directly start indexing with a blocking sample stream, the indexer will then write everything
// to disk or could store the events much more space efficient in memory
-
+
private List<NameSample> nameFinderSamples = new ArrayList<NameSample>();
private TrainingParameters trainingParams;
-
+
/**
* Initializes the current instance.
*/
public void initialize() throws ResourceInitializationException {
-
+
super.initialize();
-
+
logger = getUimaContext().getLogger();
-
+
if (logger.isLoggable(Level.INFO)) {
logger.log(Level.INFO, "Initializing the OpenNLP Name Trainer.");
- }
-
+ }
+
modelPath = CasConsumerUtil.getRequiredStringParameter(getUimaContext(),
UimaUtil.MODEL_PARAMETER);
-
+
language = CasConsumerUtil.getRequiredStringParameter(getUimaContext(),
UimaUtil.LANGUAGE_PARAMETER);
-
+
trainingParams = OpennlpUtil.loadTrainingParams(CasConsumerUtil.getOptionalStringParameter(
getUimaContext(), UimaUtil.TRAINING_PARAMS_FILE_PARAMETER), true);
String featureGeneratorDefinitionFile = CasConsumerUtil.getOptionalStringParameter(
getUimaContext(), FEATURE_GENERATOR_DEFINITION_FILE_PARAMETER);
-
+
if (featureGeneratorDefinitionFile != null) {
try {
featureGeneratorDefinition = OpennlpUtil.loadBytes(new File(featureGeneratorDefinitionFile));
} catch (IOException e) {
throw new ResourceInitializationException(e);
}
-
+
String featureGeneratorResourcesDirName = CasConsumerUtil.getOptionalStringParameter(
getUimaContext(), FEATURE_GENERATOR_RESOURCES_PARAMETER);
-
+
if (featureGeneratorResourcesDirName != null) {
featureGeneratorResourceDir = new File(featureGeneratorResourcesDirName);
}
}
-
+
additionalTrainingDataFile = CasConsumerUtil.getOptionalStringParameter(
getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_FILE);
-
+
// If the additional training data is specified, the encoding must be provided!
if (additionalTrainingDataFile != null) {
additionalTrainingDataEncoding = CasConsumerUtil.getRequiredStringParameter(
getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_ENCODING);
}
-
+
String sampleTraceFileName = CasConsumerUtil.getOptionalStringParameter(
getUimaContext(), "opennlp.uima.SampleTraceFile");
-
+
if (sampleTraceFileName != null) {
sampleTraceFile = new File(getUimaContextAdmin().getResourceManager()
.getDataPath() + File.separatorChar + sampleTraceFileName);
@@ -193,7 +193,7 @@ public final class NameFinderTrainer ext
public void typeSystemInit(TypeSystem typeSystem)
throws ResourceInitializationException {
- String sentenceTypeName =
+ String sentenceTypeName =
CasConsumerUtil.getRequiredStringParameter(getUimaContext(),
UimaUtil.SENTENCE_TYPE_PARAMETER);
@@ -206,24 +206,24 @@ public final class NameFinderTrainer ext
String nameTypeName = CasConsumerUtil.getRequiredStringParameter(getUimaContext(),
NameFinder.NAME_TYPE_PARAMETER);
-
+
nameType = CasConsumerUtil.getType(typeSystem, nameTypeName);
}
/**
* Creates a {@link List} from an {@link Iterator}.
- *
+ *
* @param <T>
* @param it
* @return
*/
private static <T> List<T> iteratorToList(Iterator<T> it) {
List<T> list = new LinkedList<T>();
-
+
while (it.hasNext()) {
list.add(it.next());
}
-
+
return list;
}
@@ -243,13 +243,13 @@ public final class NameFinderTrainer ext
return true;
}
-
+
/**
* Creates the name spans out of a list of token annotations and a list of entity annotations.
* <p>
* The name spans for the name finder use a token index and not on a character index which
* is used by the entity annotations.
- *
+ *
* @param tokenList
* @param entityAnnotations
* @return
@@ -296,7 +296,7 @@ public final class NameFinderTrainer ext
return nameList.toArray(new Span[nameList.size()]);
}
-
+
/**
* Process the given CAS object.
*/
@@ -305,9 +305,9 @@ public final class NameFinderTrainer ext
*/
public void processCas(CAS cas) {
FSIndex<AnnotationFS> sentenceIndex = cas.getAnnotationIndex(sentenceType);
-
+
boolean isClearAdaptiveData = true;
-
+
for (AnnotationFS sentenceAnnotation : sentenceIndex) {
ContainingConstraint sentenceContainingConstraint = new ContainingConstraint(
sentenceAnnotation);
@@ -337,7 +337,7 @@ public final class NameFinderTrainer ext
if (trainingSentence.getSentence().length != 0) {
nameFinderSamples.add(trainingSentence);
-
+
if (isClearAdaptiveData) {
isClearAdaptiveData = false;
}
@@ -349,39 +349,39 @@ public final class NameFinderTrainer ext
}
}
}
-
+
/**
* Called if the processing is finished, this method
* does the training.
*/
public void collectionProcessComplete(ProcessTrace trace)
throws ResourceProcessException, IOException {
-
+
if (logger.isLoggable(Level.INFO)) {
- logger.log(Level.INFO, "Collected " + nameFinderSamples.size() +
+ logger.log(Level.INFO, "Collected " + nameFinderSamples.size() +
" name samples.");
}
-
+
GIS.PRINT_MESSAGES = false;
-
- // create training stream ...
+
+ // create training stream ...
ObjectStream<NameSample> samples = ObjectStreamUtils.createObjectStream(nameFinderSamples);
-
+
InputStream additionalTrainingDataIn = null;
Writer samplesOut = null;
TokenNameFinderModel nameModel;
try {
if (additionalTrainingDataFile != null) {
-
+
if (logger.isLoggable(Level.INFO)) {
logger.log(Level.INFO, "Using additional training data file: " + additionalTrainingDataFile);
}
-
+
additionalTrainingDataIn = new FileInputStream(additionalTrainingDataFile);
-
+
ObjectStream<NameSample> additionalSamples = new NameSampleDataStream(
new PlainTextByLineStream(new InputStreamReader(additionalTrainingDataIn, additionalTrainingDataEncoding)));
-
+
samples = ObjectStreamUtils.createObjectStream(samples, additionalSamples);
}
@@ -389,16 +389,16 @@ public final class NameFinderTrainer ext
samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding);
samples = new SampleTraceStream<NameSample>(samples, samplesOut);
}
-
+
Map<String, Object> resourceMap;
-
+
if (featureGeneratorResourceDir != null) {
resourceMap = TokenNameFinderTrainerTool.loadResources(featureGeneratorResourceDir, null);
}
else {
resourceMap = Collections.emptyMap();
}
-
+
nameModel = NameFinderME.train(language, null,
samples, trainingParams, featureGeneratorDefinition, resourceMap);
}
@@ -406,12 +406,12 @@ public final class NameFinderTrainer ext
if (additionalTrainingDataIn != null) {
additionalTrainingDataIn.close();
}
-
+
if (samplesOut != null) {
samplesOut.close();
}
}
-
+
// dereference to allow garbage collection
nameFinderSamples = null;
@@ -419,19 +419,19 @@ public final class NameFinderTrainer ext
.getDataPath() + File.separatorChar + modelPath);
OpennlpUtil.serialize(nameModel, modelFile);
-
+
if (logger.isLoggable(Level.INFO)) {
logger.log(Level.INFO, "Model was written to: " + modelFile.getAbsolutePath());
}
}
-
+
/**
* The trainer is not stateless.
*/
public boolean isStateless() {
return false;
}
-
+
/**
* Destroys the current instance.
*/
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/TokenNameFinderModelResource.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/TokenNameFinderModelResource.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/TokenNameFinderModelResource.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/TokenNameFinderModelResource.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.namefind;
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/TokenNameFinderModelResourceImpl.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/TokenNameFinderModelResourceImpl.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/TokenNameFinderModelResourceImpl.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/namefind/TokenNameFinderModelResourceImpl.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.namefind;
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/Normalizer.java Fri May 2 12:34:23 2014
@@ -46,19 +46,19 @@ import org.apache.uima.util.Logger;
/**
* The Normalizer tries the structure annotations. The structured value
* is than assigned to a field of the annotation.
- *
- * The process depends on the
- *
+ *
+ * The process depends on the
+ *
* string Tokens must be (fuzzy) mapped to categories eg. a month, a day or a
* year (use dictionary) integer, float tokens must be parsed eg. for percentage
* or period boolean tokens must be parsed eg is there any ???
- *
- *
+ *
+ *
* restricted set of outcomes throw error if not matched or silently fail
* unrestricted set of outcomes
*/
public class Normalizer extends CasAnnotator_ImplBase {
-
+
/**
* This set contains all supported range types.
*/
@@ -89,7 +89,7 @@ public class Normalizer extends CasAnnot
/**
* The target type which the text should have. This type must be primitive.
- *
+ *
* It should not be possible to assign something to this feature with is not
* structured. The feature should define allowed values.
*/
@@ -98,10 +98,10 @@ public class Normalizer extends CasAnnot
// private Type mSentenceType;
private StringDictionary mLookupDictionary;
-
+
/**
* Initializes a new instance.
- *
+ *
* Note: Use {@link #initialize(UimaContext) } to initialize this instance. Not
* use the constructor.
*/
@@ -111,7 +111,7 @@ public class Normalizer extends CasAnnot
/**
* Initializes the current instance with the given context.
- *
+ *
* Note: Do all initialization in this method, do not use the constructor.
*/
public void initialize(UimaContext context)
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/NumberUtil.java Fri May 2 12:34:23 2014
@@ -29,7 +29,7 @@ public final class NumberUtil {
/**
* Checks if the language is supported.
- *
+ *
* @param languageCode language code, e.g. "en", "pt"
* @return true if the language is supported
*/
@@ -72,7 +72,7 @@ public final class NumberUtil {
/**
* Gives its best to parse the provided number.
- *
+ *
* @param number number to parse
* @param languageCode language code, e.g. "en", "pt"
* @return parsed number
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/normalizer/StringDictionary.java Fri May 2 12:34:23 2014
@@ -41,7 +41,7 @@ public class StringDictionary {
/**
* Initializes the current instance.
- *
+ *
* @param in
* @throws IOException
* @throws InvalidFormatException
@@ -70,7 +70,7 @@ public class StringDictionary {
/**
* Writes the ngram instance to the given {@link OutputStream}.
- *
+ *
* @param out
* @throws IOException
* if an I/O Error during writing occures
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/parser/Parser.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.parser;
@@ -68,28 +68,28 @@ import org.apache.uima.util.Logger;
* </table>
*/
public class Parser extends CasAnnotator_ImplBase {
-
+
private static class ParseConverter {
private Map<Integer, Integer> mIndexMap = new HashMap<Integer, Integer>();
-
+
private Parse mParseForTagger;
-
+
private final String mSentence;
-
+
/**
* Initializes a new instance.
- *
+ *
* @param sentence
* @param tokens
*/
public ParseConverter(String sentence, Span tokens[]) {
-
+
mSentence = sentence;
-
+
StringBuilder sentenceStringBuilder = new StringBuilder();
-
+
String tokenList[] = new String[tokens.length];
-
+
for (int i = 0; i < tokens.length; i++) {
String tokenString = tokens[i].getCoveredText(sentence).toString();
String escapedToken = escape(tokenString);
@@ -107,18 +107,18 @@ public class Parser extends CasAnnotator
sentenceStringBuilder.append(' ');
}
-
+
// remove last space
if (sentenceStringBuilder.length() > 0)
sentenceStringBuilder.setLength(sentenceStringBuilder.length() - 1);
-
+
String tokenizedSentence = sentenceStringBuilder.toString();
-
- mParseForTagger = new Parse(tokenizedSentence,
+
+ mParseForTagger = new Parse(tokenizedSentence,
new Span(0, tokenizedSentence.length()), "INC", 1, null);
-
+
int start = 0;
-
+
for (String token : tokenList) {
mParseForTagger.insert(new Parse(tokenizedSentence, new Span(start,
start + token.length()),
@@ -127,23 +127,23 @@ public class Parser extends CasAnnotator
start += token.length() + 1;
}
}
-
+
private static String escape(String text) {
return text;
}
-
+
/**
* Creates the parse for the tagger.
- *
+ *
* @return the parse which can be passed to the tagger
*/
Parse getParseForTagger() {
return mParseForTagger;
}
-
+
/**
* Converts the parse from the tagger back.
- *
+ *
* @param parseFromTagger
* @return the final parse
*/
@@ -164,20 +164,20 @@ public class Parser extends CasAnnotator
return transformedParse;
}
}
-
+
public static final String PARSE_TYPE_PARAMETER = "opennlp.uima.ParseType";
- public static final String TYPE_FEATURE_PARAMETER =
+ public static final String TYPE_FEATURE_PARAMETER =
"opennlp.uima.TypeFeature";
-
- public static final String CHILDREN_FEATURE_PARAMETER =
+
+ public static final String CHILDREN_FEATURE_PARAMETER =
"opennlp.uima.ChildrenFeature";
-
+
public static final String PROBABILITY_FEATURE_PARAMETER =
"opennlp.uima.ProbabilityFeature";
-
+
protected UimaContext context;
-
+
protected Logger mLogger;
private Type mSentenceType;
@@ -189,11 +189,11 @@ public class Parser extends CasAnnotator
private Type mParseType;
private Feature mTypeFeature;
-
+
private Feature childrenFeature;
private Feature probabilityFeature;
-
+
/**
* Initializes the current instance with the given context.
*/
@@ -223,7 +223,7 @@ public class Parser extends CasAnnotator
mParser = ParserFactory.create(model);
}
-
+
/**
* Initializes the type system.
*/
@@ -241,14 +241,14 @@ public class Parser extends CasAnnotator
mTypeFeature = AnnotatorUtil.getRequiredFeatureParameter(context,
mParseType, TYPE_FEATURE_PARAMETER, CAS.TYPE_NAME_STRING);
-
+
childrenFeature = AnnotatorUtil.getRequiredFeatureParameter(context,
mParseType, CHILDREN_FEATURE_PARAMETER, CAS.TYPE_NAME_FS_ARRAY);
-
+
probabilityFeature = AnnotatorUtil.getOptionalFeatureParameter(context,
mParseType, PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);
}
-
+
/**
* Performs parsing on the given {@link CAS} object.
*/
@@ -259,77 +259,77 @@ public class Parser extends CasAnnotator
process(cas, sentence);
}
}
-
+
protected void process(CAS cas, AnnotationFS sentenceAnnotation) {
FSIndex<AnnotationFS> allTokens = cas.getAnnotationIndex(mTokenType);
-
- ContainingConstraint containingConstraint =
+
+ ContainingConstraint containingConstraint =
new ContainingConstraint(sentenceAnnotation);
-
+
String sentence = sentenceAnnotation.getCoveredText();
Iterator<AnnotationFS> containingTokens = cas.createFilteredIterator(
allTokens.iterator(), containingConstraint);
-
+
List<Span> tokenSpans = new LinkedList<Span>();
-
+
while(containingTokens.hasNext()) {
AnnotationFS token = (AnnotationFS) containingTokens.next();
- tokenSpans.add(new Span(token.getBegin() - sentenceAnnotation.getBegin(),
+ tokenSpans.add(new Span(token.getBegin() - sentenceAnnotation.getBegin(),
token.getEnd() - sentenceAnnotation.getBegin()));
}
-
- ParseConverter converter = new ParseConverter(sentence,(Span[])
+
+ ParseConverter converter = new ParseConverter(sentence,(Span[])
tokenSpans.toArray(new Span[tokenSpans.size()]));
-
+
Parse unparsedTree = converter.getParseForTagger();
-
+
if (unparsedTree.getChildCount() > 0) {
-
+
Parse parse = mParser.parse(unparsedTree);
-
+
// TODO: We need a strategy to handle the case that a full
// parse could not be found. What to do in this case?
-
+
parse = converter.transformParseFromTagger(parse);
-
+
if (mLogger.isLoggable(Level.INFO)) {
StringBuffer parseString = new StringBuffer();
parse.show(parseString);
-
+
mLogger.log(Level.INFO, parseString.toString());
}
-
+
createAnnotation(cas, sentenceAnnotation.getBegin(), parse);
}
}
-
+
protected AnnotationFS createAnnotation(CAS cas, int offset, Parse parse) {
-
+
Parse parseChildren[] = parse.getChildren();
AnnotationFS parseChildAnnotations[] = new AnnotationFS[parseChildren.length];
-
+
// do this for all children
for (int i = 0; i < parseChildren.length; i++) {
parseChildAnnotations[i] = createAnnotation(cas, offset, parseChildren[i]);
}
-
- AnnotationFS parseAnnotation = cas.createAnnotation(mParseType, offset +
+
+ AnnotationFS parseAnnotation = cas.createAnnotation(mParseType, offset +
parse.getSpan().getStart(), offset + parse.getSpan().getEnd());
-
+
parseAnnotation.setStringValue(mTypeFeature, parse.getType());
-
+
if (probabilityFeature != null) {
parseAnnotation.setDoubleValue(probabilityFeature, parse.getProb());
}
-
+
ArrayFS childrenArray = cas.createArrayFS(parseChildAnnotations.length);
childrenArray.copyFromArray(parseChildAnnotations, 0, 0, parseChildAnnotations.length);
parseAnnotation.setFeatureValue(childrenFeature, childrenArray);
-
+
cas.getIndexRepository().addFS(parseAnnotation);
-
+
return parseAnnotation;
}
/**
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResource.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResource.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResource.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResource.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.postag;
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResourceImpl.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResourceImpl.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResourceImpl.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSModelResourceImpl.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.postag;
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTagger.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.postag;
@@ -60,7 +60,7 @@ import org.apache.uima.util.Logger;
* <caption></caption>
* <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
* <tr><td>String</td> <td>opennlp.uima.ProbabilityFeature</td> <td>The name of the double probability feature (not set by default)</td></tr>
- * <tr><td>Integer</td> <td>opennlp.uima.BeamSize</td>
+ * <tr><td>Integer</td> <td>opennlp.uima.BeamSize</td>
* <tr><td>String</td> <td>opennlp.uima.DictionaryName</td> <td>The name of the dictionary file</td></tr>
* </table>
*/
@@ -82,7 +82,7 @@ public final class POSTagger extends Cas
/**
* Initializes a new instance.
- *
+ *
* Note: Use {@link #initialize(UimaContext) } to initialize this instance. Not use the
* constructor.
*/
@@ -92,7 +92,7 @@ public final class POSTagger extends Cas
/**
* Initializes the current instance with the given context.
- *
+ *
* Note: Do all initialization in this method, do not use the constructor.
*/
@Override
@@ -162,7 +162,7 @@ public final class POSTagger extends Cas
this.sentenceType, this.tokenType);
for (AnnotationIteratorPair annotationIteratorPair : comboIterator) {
-
+
final List<AnnotationFS> sentenceTokenAnnotationList = new LinkedList<AnnotationFS>();
final List<String> sentenceTokenList = new LinkedList<String>();
@@ -216,7 +216,7 @@ public final class POSTagger extends Cas
// delete last whitespace
if (sentenceWithPos.length() > 1) // not 0 because it contains already the " char
sentenceWithPos.setLength(sentenceWithPos.length() - 1);
-
+
sentenceWithPos.append("\"");
this.logger.log(Level.FINER, sentenceWithPos.toString());
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/postag/POSTaggerTrainer.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.postag;
@@ -71,7 +71,7 @@ import org.apache.uima.util.ProcessTrace
public class POSTaggerTrainer extends CasConsumer_ImplBase {
public static final String TAG_DICTIONARY_NAME = "opennlp.uima.TagDictionaryName";
-
+
private UimaContext mContext;
private Type mSentenceType;
@@ -81,37 +81,37 @@ public class POSTaggerTrainer extends Ca
private String mModelName;
private Feature mPOSFeature;
-
+
private Logger mLogger;
-
+
private List<POSSample> mPOSSamples = new ArrayList<POSSample>();
-
+
private String language;
-
+
private POSDictionary tagDictionary;
-
+
/**
* Initializes the current instance.
*/
public void initialize() throws ResourceInitializationException {
-
+
super.initialize();
-
+
mContext = getUimaContext();
-
+
mLogger = mContext.getLogger();
-
+
if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the OpenNLP " +
"POSTagger trainer.");
- }
-
+ }
+
mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.MODEL_PARAMETER);
-
+
language = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.LANGUAGE_PARAMETER);
-
+
String tagDictionaryName = CasConsumerUtil.getOptionalStringParameter(mContext,
TAG_DICTIONARY_NAME);
@@ -132,16 +132,16 @@ public class POSTaggerTrainer extends Ca
}
}
}
- }
-
+ }
+
/**
* Initialize the current instance with the given type system.
*/
- public void typeSystemInit(TypeSystem typeSystem)
+ public void typeSystemInit(TypeSystem typeSystem)
throws ResourceInitializationException {
String sentenceTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.SENTENCE_TYPE_PARAMETER);
-
+
if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, UimaUtil.SENTENCE_TYPE_PARAMETER + ": " +
sentenceTypeName);
@@ -151,15 +151,15 @@ public class POSTaggerTrainer extends Ca
String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.TOKEN_TYPE_PARAMETER);
-
+
mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
-
+
String posFeatureName = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.POS_FEATURE_PARAMETER);
-
+
mPOSFeature = mTokenType.getFeatureByBaseName(posFeatureName);
}
-
+
/**
* Process the given CAS object.
*/
@@ -171,62 +171,62 @@ public class POSTaggerTrainer extends Ca
process(cas, sentence);
}
}
-
+
private void process(CAS tcas, AnnotationFS sentence) {
-
+
FSIndex<AnnotationFS> allTokens = tcas.getAnnotationIndex(mTokenType);
- ContainingConstraint containingConstraint =
+ ContainingConstraint containingConstraint =
new ContainingConstraint(sentence);
-
+
List<String> tokens = new ArrayList<String>();
List<String> tags = new ArrayList<String>();
-
+
Iterator<AnnotationFS> containingTokens = tcas.createFilteredIterator(
allTokens.iterator(), containingConstraint);
-
+
while (containingTokens.hasNext()) {
-
+
AnnotationFS tokenAnnotation = (AnnotationFS) containingTokens.next();
-
+
String tag = tokenAnnotation.getFeatureValueAsString(mPOSFeature);
-
+
tokens.add(tokenAnnotation.getCoveredText().trim());
tags.add(tag);
}
-
+
mPOSSamples.add(new POSSample(tokens, tags));
}
-
+
/**
* Called if the processing is finished, this method
* does the training.
*/
- public void collectionProcessComplete(ProcessTrace trace)
+ public void collectionProcessComplete(ProcessTrace trace)
throws ResourceProcessException, IOException {
-
+
GIS.PRINT_MESSAGES = false;
- POSModel posTaggerModel = POSTaggerME.train(language,
+ POSModel posTaggerModel = POSTaggerME.train(language,
ObjectStreamUtils.createObjectStream(mPOSSamples),
ModelType.MAXENT, tagDictionary, null, 100, 5);
-
+
// dereference to allow garbage collection
mPOSSamples = null;
-
+
File modelFile = new File(getUimaContextAdmin().getResourceManager()
.getDataPath() + File.separatorChar + mModelName);
OpennlpUtil.serialize(posTaggerModel, modelFile);
}
-
+
/**
* The trainer is not stateless.
*/
public boolean isStateless() {
return false;
}
-
+
/**
* Releases allocated resources.
*/
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/AbstractSentenceDetector.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.sentdetect;
@@ -43,11 +43,11 @@ public abstract class AbstractSentenceDe
protected Logger logger;
protected Type containerType;
-
+
protected Type sentenceType;
private Boolean isRemoveExistingAnnotations;
-
+
@Override
public void initialize(UimaContext context)
throws ResourceInitializationException {
@@ -68,7 +68,7 @@ public abstract class AbstractSentenceDe
isRemoveExistingAnnotations = false;
}
}
-
+
@Override
public void typeSystemInit(TypeSystem typeSystem)
throws AnalysisEngineProcessException {
@@ -84,12 +84,12 @@ public abstract class AbstractSentenceDe
sentenceType = AnnotatorUtil.getRequiredTypeParameter(context, typeSystem,
UimaUtil.SENTENCE_TYPE_PARAMETER);
}
-
+
protected abstract Span[] detectSentences(String text);
-
+
protected void postProcessAnnotations(AnnotationFS sentences[]) {
}
-
+
@Override
public void process(CAS cas) throws AnalysisEngineProcessException {
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.sentdetect;
@@ -42,7 +42,7 @@ import org.apache.uima.resource.Resource
* <tr><td>String</td> <td>opennlp.uima.ModelName</td> <td>The name of the model file</td></tr>
* <tr><td>String</td> <td>opennlp.uima.SentenceType</td> <td>The full name of the sentence type</td></tr>
* </table>
- * <p>
+ * <p>
* Optional parameters
* <table border=1>
* <caption></caption>
@@ -60,20 +60,20 @@ public final class SentenceDetector exte
private SentenceDetectorME sentenceDetector;
private Feature probabilityFeature;
-
+
/**
* Initializes a new instance.
*
- * Note: Use {@link #initialize(UimaContext) } to initialize
+ * Note: Use {@link #initialize(UimaContext) } to initialize
* this instance. Not use the constructor.
*/
public SentenceDetector() {
// must not be implemented !
}
-
+
/**
* Initializes the current instance with the given context.
- *
+ *
* Note: Do all initialization in this method, do not use the constructor.
*/
public void initialize(UimaContext context)
@@ -112,24 +112,24 @@ public final class SentenceDetector exte
protected Span[] detectSentences(String text) {
return sentenceDetector.sentPosDetect(text);
}
-
+
@Override
protected void postProcessAnnotations(AnnotationFS sentences[]) {
-
+
if (probabilityFeature != null) {
- double sentenceProbabilities[] = sentenceDetector.getSentenceProbabilities();
-
+ double sentenceProbabilities[] = sentenceDetector.getSentenceProbabilities();
+
for (int i = 0; i < sentences.length; i++) {
sentences[i].setDoubleValue(probabilityFeature, sentenceProbabilities[i]);
}
}
}
-
+
/**
* Releases allocated resources.
*/
public void destroy() {
- // dereference model to allow garbage collection
+ // dereference model to allow garbage collection
sentenceDetector = null;
}
}
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetectorTrainer.java Fri May 2 12:34:23 2014
@@ -66,71 +66,71 @@ import org.apache.uima.util.ProcessTrace
* </table>
*/
public final class SentenceDetectorTrainer extends CasConsumer_ImplBase {
-
+
private List<SentenceSample> sentenceSamples = new ArrayList<SentenceSample>();
private Type mSentenceType;
private String mModelName;
-
+
private String language = "en";
-
+
private Logger mLogger;
private UimaContext mContext;
-
+
private String eosChars;
private File sampleTraceFile;
private String sampleTraceFileEncoding;
-
+
/**
* Initializes the current instance.
*/
public void initialize() throws ResourceInitializationException {
-
+
super.initialize();
-
+
mContext = getUimaContext();
-
+
mLogger = mContext.getLogger();
-
+
if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the OpenNLP SentenceDetector " +
"trainer.");
- }
-
- mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
+ }
+
+ mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.MODEL_PARAMETER);
-
+
language = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.LANGUAGE_PARAMETER);
-
+
eosChars = CasConsumerUtil.getOptionalStringParameter(mContext, "opennlp.uima.EOSChars");
-
-
+
+
String sampleTraceFileName = CasConsumerUtil.getOptionalStringParameter(
getUimaContext(), "opennlp.uima.SampleTraceFile");
-
+
if (sampleTraceFileName != null) {
sampleTraceFile = new File(getUimaContextAdmin().getResourceManager()
.getDataPath() + File.separatorChar + sampleTraceFileName);
sampleTraceFileEncoding = CasConsumerUtil.getRequiredStringParameter(
getUimaContext(), "opennlp.uima.SampleTraceFileEncoding");
- }
+ }
}
-
+
/**
* Initializes the current instance with the given type system.
*/
public void typeSystemInit(TypeSystem typeSystem)
throws ResourceInitializationException {
-
- String sentenceTypeName =
+
+ String sentenceTypeName =
CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.SENTENCE_TYPE_PARAMETER);
-
+
mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
}
@@ -159,32 +159,32 @@ public final class SentenceDetectorTrain
public void collectionProcessComplete(ProcessTrace trace)
throws ResourceProcessException, IOException {
GIS.PRINT_MESSAGES = false;
-
- char eos[] = null;
+
+ char eos[] = null;
if (eosChars != null) {
eos = eosChars.toCharArray();
}
-
+
SentenceDetectorFactory sdFactory = SentenceDetectorFactory.create(
null, language, true, null, eos);
-
+
// TrainingParameters mlParams = ModelUtil.createTrainingParameters(100, 5);
TrainingParameters mlParams = ModelUtil.createDefaultTrainingParameters();
ObjectStream<SentenceSample> samples = ObjectStreamUtils.createObjectStream(sentenceSamples);
-
+
Writer samplesOut = null;
-
+
if (sampleTraceFile != null) {
samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding);
samples = new SampleTraceStream<SentenceSample>(samples, samplesOut);
}
-
+
SentenceModel sentenceModel = SentenceDetectorME.train(language, samples,
sdFactory, mlParams);
-
+
// dereference to allow garbage collection
sentenceSamples = null;
-
+
File modelFile = new File(getUimaContextAdmin().getResourceManager()
.getDataPath() + File.separatorChar + mModelName);
@@ -197,7 +197,7 @@ public final class SentenceDetectorTrain
public boolean isStateless() {
return false;
}
-
+
/**
* Releases allocated resources.
*/
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResource.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResource.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResource.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResource.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.sentdetect;
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceModelResourceImpl.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.sentdetect;
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/AbstractTokenizer.java Fri May 2 12:34:23 2014
@@ -56,7 +56,7 @@ public abstract class AbstractTokenizer
protected AbstractTokenizer(String name) {
this.name = name;
}
-
+
@Override
public void initialize(UimaContext context)
throws ResourceInitializationException {
@@ -94,7 +94,7 @@ public abstract class AbstractTokenizer
protected void postProcessAnnotations(Span tokens[],
AnnotationFS tokenAnnotations[]) {
}
-
+
protected abstract Span[] tokenize(CAS cas, AnnotationFS sentence);
@Override
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/SimpleTokenizer.java Fri May 2 12:34:23 2014
@@ -35,24 +35,24 @@ import org.apache.uima.cas.text.Annotati
* </table>
*/
public final class SimpleTokenizer extends AbstractTokenizer {
-
+
/**
* The OpenNLP simple tokenizer.
*/
- private opennlp.tools.tokenize.SimpleTokenizer tokenizer =
+ private opennlp.tools.tokenize.SimpleTokenizer tokenizer =
opennlp.tools.tokenize.SimpleTokenizer.INSTANCE;
/**
* Initializes the current instance.
*
- * Note: Use {@link #initialize(UimaContext) } to initialize
+ * Note: Use {@link #initialize(UimaContext) } to initialize
* this instance. Not use the constructor.
*/
public SimpleTokenizer() {
super("OpenNLP Simple Tokenizer");
// must not be implemented !
}
-
+
@Override
protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
return tokenizer.tokenizePos(sentence.getCoveredText());
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java Fri May 2 12:34:23 2014
@@ -48,36 +48,36 @@ import org.apache.uima.resource.Resource
* <table border=1>
* <caption></caption>
* <tr><th>Type</th> <th>Name</th> <th>Description</th></tr>
- * <tr><td>String</td> <td>opennlp.uima.ProbabilityFeature</td> <td>The name of the double
+ * <tr><td>String</td> <td>opennlp.uima.ProbabilityFeature</td> <td>The name of the double
* probability feature (not set by default)</td>
* </table>
*
* @see TokenizerME
*/
public final class Tokenizer extends AbstractTokenizer {
-
+
/**
* The OpenNLP tokenizer.
*/
private TokenizerME tokenizer;
-
+
private Feature probabilityFeature;
-
+
/**
* Initializes a new instance.
*
- * Note: Use {@link #initialize(UimaContext) } to initialize
+ * Note: Use {@link #initialize(UimaContext) } to initialize
* this instance. Not use the constructor.
*/
public Tokenizer() {
super("OpenNLP Tokenizer");
-
+
// must not be implemented !
}
-
+
/**
* Initializes the current instance with the given context.
- *
+ *
* Note: Do all initialization in this method, do not use the constructor.
*/
public void initialize(UimaContext context)
@@ -112,12 +112,12 @@ public final class Tokenizer extends Abs
UimaUtil.PROBABILITY_FEATURE_PARAMETER, CAS.TYPE_NAME_DOUBLE);
}
-
+
@Override
protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
return tokenizer.tokenizePos(sentence.getCoveredText());
}
-
+
@Override
protected void postProcessAnnotations(Span[] tokens,
AnnotationFS[] tokenAnnotations) {
@@ -131,12 +131,12 @@ public final class Tokenizer extends Abs
}
}
}
-
+
/**
* Releases allocated resources.
*/
public void destroy() {
- // dereference model to allow garbage collection
+ // dereference model to allow garbage collection
tokenizer = null;
}
}
\ No newline at end of file
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerModelResource.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerModelResource.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerModelResource.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerModelResource.java Fri May 2 12:34:23 2014
@@ -29,7 +29,7 @@ public interface TokenizerModelResource
/**
* Retrieves the shared model instance.
- *
+ *
* @return the shared model instance
*/
TokenizerModel getModel();
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/TokenizerTrainer.java Fri May 2 12:34:23 2014
@@ -79,8 +79,8 @@ import org.apache.uima.util.ProcessTrace
* </table>
*/
public final class TokenizerTrainer extends CasConsumer_ImplBase {
-
- public static final String IS_ALPHA_NUMERIC_OPTIMIZATION =
+
+ public static final String IS_ALPHA_NUMERIC_OPTIMIZATION =
"opennlp.uima.tokenizer.IsAlphaNumericOptimization";
private List<TokenSample> tokenSamples = new ArrayList<TokenSample>();
@@ -106,48 +106,48 @@ public final class TokenizerTrainer exte
private String sampleTraceFileEncoding;
private File sampleTraceFile;
-
+
/**
* Initializes the current instance.
*/
public void initialize() throws ResourceInitializationException {
-
+
super.initialize();
-
+
mContext = getUimaContext();
-
+
mLogger = mContext.getLogger();
-
+
if (mLogger.isLoggable(Level.INFO)) {
mLogger.log(Level.INFO, "Initializing the OpenNLP Tokenizer trainer.");
- }
-
+ }
+
mModelName = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.MODEL_PARAMETER);
-
+
language = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.LANGUAGE_PARAMETER);
-
- isSkipAlphaNumerics =
+
+ isSkipAlphaNumerics =
CasConsumerUtil.getOptionalBooleanParameter(
mContext, IS_ALPHA_NUMERIC_OPTIMIZATION);
-
+
if (isSkipAlphaNumerics == null) {
isSkipAlphaNumerics = false;
}
-
+
additionalTrainingDataFile = CasConsumerUtil.getOptionalStringParameter(
getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_FILE);
-
+
// If the additional training data is specified, the encoding must be provided!
if (additionalTrainingDataFile != null) {
additionalTrainingDataEncoding = CasConsumerUtil.getRequiredStringParameter(
getUimaContext(), UimaUtil.ADDITIONAL_TRAINING_DATA_ENCODING);
}
-
+
String sampleTraceFileName = CasConsumerUtil.getOptionalStringParameter(
getUimaContext(), "opennlp.uima.SampleTraceFile");
-
+
if (sampleTraceFileName != null) {
sampleTraceFile = new File(getUimaContextAdmin().getResourceManager()
.getDataPath() + File.separatorChar + sampleTraceFileName);
@@ -164,12 +164,12 @@ public final class TokenizerTrainer exte
String sentenceTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.SENTENCE_TYPE_PARAMETER);
-
+
mSentenceType = CasConsumerUtil.getType(typeSystem, sentenceTypeName);
String tokenTypeName = CasConsumerUtil.getRequiredStringParameter(mContext,
UimaUtil.TOKEN_TYPE_PARAMETER);
-
+
mTokenType = CasConsumerUtil.getType(typeSystem, tokenTypeName);
}
@@ -177,18 +177,18 @@ public final class TokenizerTrainer exte
* Process the given CAS object.
*/
public void processCas(CAS cas) {
-
+
FSIndex<AnnotationFS> sentenceAnnotations = cas.getAnnotationIndex(mSentenceType);
for (AnnotationFS sentence : sentenceAnnotations) {
process(cas, sentence);
}
}
-
+
private void process(CAS tcas, AnnotationFS sentence) {
FSIndex<AnnotationFS> allTokens = tcas.getAnnotationIndex(mTokenType);
- ContainingConstraint containingConstraint =
+ ContainingConstraint containingConstraint =
new ContainingConstraint(sentence);
Iterator<AnnotationFS> containingTokens = tcas.createFilteredIterator(
@@ -205,9 +205,9 @@ public final class TokenizerTrainer exte
}
Span[] spans = openNLPSpans.toArray(new Span[openNLPSpans.size()]);
-
+
Arrays.sort(spans);
-
+
tokenSamples.add(new TokenSample(sentence.getCoveredText(), spans));
}
@@ -217,67 +217,67 @@ public final class TokenizerTrainer exte
*/
public void collectionProcessComplete(ProcessTrace arg0)
throws ResourceProcessException, IOException {
-
+
if (mLogger.isLoggable(Level.INFO)) {
- mLogger.log(Level.INFO, "Collected " + tokenSamples.size() +
+ mLogger.log(Level.INFO, "Collected " + tokenSamples.size() +
" token samples.");
}
-
+
GIS.PRINT_MESSAGES = false;
-
+
ObjectStream<TokenSample> samples = ObjectStreamUtils.createObjectStream(tokenSamples);
-
+
// Write stream to disk ...
// if trace file
// serialize events ...
-
+
InputStream additionalTrainingDataIn = null;
Writer samplesOut = null;
TokenizerModel tokenModel;
-
+
try {
if (additionalTrainingDataFile != null) {
-
+
if (mLogger.isLoggable(Level.INFO)) {
- mLogger.log(Level.INFO, "Using addional training data file: " + additionalTrainingDataFile);
+ mLogger.log(Level.INFO, "Using addional training data file: " + additionalTrainingDataFile);
}
-
+
additionalTrainingDataIn = new FileInputStream(additionalTrainingDataFile);
-
+
ObjectStream<TokenSample> additionalSamples = new TokenSampleStream(
new PlainTextByLineStream(new InputStreamReader(additionalTrainingDataIn, additionalTrainingDataEncoding)));
-
+
samples = ObjectStreamUtils.createObjectStream(samples, additionalSamples);
}
-
+
if (sampleTraceFile != null) {
samplesOut = new OutputStreamWriter(new FileOutputStream(sampleTraceFile), sampleTraceFileEncoding);
samples = new SampleTraceStream<TokenSample>(samples, samplesOut);
}
-
+
tokenModel = TokenizerME.train(language, samples, isSkipAlphaNumerics);
}
finally {
if (additionalTrainingDataIn != null)
additionalTrainingDataIn.close();
}
-
+
// dereference to allow garbage collection
tokenSamples = null;
-
+
File modelFile = new File(getUimaContextAdmin().getResourceManager()
.getDataPath() + File.separatorChar + mModelName);
-
+
OpennlpUtil.serialize(tokenModel, modelFile);
}
-
+
/**
* The trainer is not stateless.
*/
public boolean isStateless() {
return false;
}
-
+
/**
* Releases allocated resources.
*/
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/tokenize/WhitespaceTokenizer.java Fri May 2 12:34:23 2014
@@ -35,18 +35,18 @@ import org.apache.uima.cas.text.Annotati
* </table>
*/
public final class WhitespaceTokenizer extends AbstractTokenizer {
-
+
/**
* Initializes the current instance.
*
- * Note: Use {@link #initialize(UimaContext) } to initialize
+ * Note: Use {@link #initialize(UimaContext) } to initialize
* this instance. Not use the constructor.
*/
public WhitespaceTokenizer() {
super("OpenNLP Whitespace Tokenizer");
// must not be implemented !
}
-
+
@Override
protected Span[] tokenize(CAS cas, AnnotationFS sentence) {
return opennlp.tools.tokenize.WhitespaceTokenizer.INSTANCE.
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AbstractModelResource.java Fri May 2 12:34:23 2014
@@ -25,11 +25,11 @@ import org.apache.uima.resource.Resource
import org.apache.uima.resource.SharedResourceObject;
public abstract class AbstractModelResource<T> implements SharedResourceObject {
-
+
protected T model;
-
+
protected abstract T loadModel(InputStream in) throws IOException;
-
+
public void load(DataResource resource) throws ResourceInitializationException {
try {
model = loadModel(resource.getInputStream());
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComboIterator.java Fri May 2 12:34:23 2014
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -29,13 +29,13 @@ import org.apache.uima.cas.text.Annotati
/**
* UIMA Annotation iterator combination of super- and subiterator.
- *
+ *
* <p>
* This class supports a common idiom in UIMA annotation iteration, where you need to iterate over
* two kinds of annotations in lock-step. For example, you often want to iterate over all sentences,
* then do something on each sentence and all tokens in that sentence. Here's how to do this with
* this class.
- *
+ *
* <pre>
* CAS cas = ...
* Type sentenceType = ..., tokenType = ...
@@ -46,19 +46,19 @@ import org.apache.uima.cas.text.Annotati
* // Obtain sentence annotation
* AnnotationFS sentence = aiPair.getAnnotation();
* // Do something with sentence...
- *
+ *
* // Iterate over tokens
* for (AnnotationFS token : aiPair.getSubIterator()) {
* // Do something with tokens...
* }
* }
* </pre>
- *
+ *
* The combo iterator returns in its <code>next()</code> method a pair of an annotation of the upper
* type (e.g., sentence), and an iterator over annotations of the lower type (e.g., tokens). Note
* that both the upper and lower iterator also implement the Iterable interface and can be use
* directly in for-loops.
- *
+ *
* <p>
* Note that only this usage is safe. To keep the implementation efficient, the combo iterator keeps
* two iterators internally that it increments in lock-step. Do not attempt, for example, to collect
@@ -146,7 +146,7 @@ public class AnnotationComboIterator imp
/**
* Create a new combo iterator.
- *
+ *
* @param cas
* The CAS we're operating on.
* @param upper
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationComparator.java Fri May 2 12:34:23 2014
@@ -26,13 +26,13 @@ import org.apache.uima.cas.text.Annotati
*/
public class AnnotationComparator implements Comparator<AnnotationFS>
{
-
+
/**
* Compares the begin indexes of the annotations.
- *
+ *
* @param a - first annotation
* @param b - second annotation
- *
+ *
* @return 0 if equals, < 0 if before and > 0 if after
*/
public int compare(AnnotationFS a, AnnotationFS b) {
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationIteratorPair.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationIteratorPair.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationIteratorPair.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotationIteratorPair.java Fri May 2 12:34:23 2014
@@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/AnnotatorUtil.java Fri May 2 12:34:23 2014
@@ -38,53 +38,53 @@ import org.apache.uima.util.Logger;
* This is a utility class for Annotators.
*/
public final class AnnotatorUtil {
-
+
private AnnotatorUtil(){
// util class not must not instantiated
}
-
+
/**
* Retrieves a type of the given name from the given type system.
- *
+ *
* @param typeSystem
* @param name
* @return the type
- *
+ *
* @throws AnalysisEngineProcessException
*/
public static Type getType(TypeSystem typeSystem, String name)
throws AnalysisEngineProcessException {
Type type = typeSystem.getType(name);
-
+
if (type == null) {
throw new OpenNlpAnnotatorProcessException(
ExceptionMessages.TYPE_NOT_FOUND,
new Object[] {name});
}
-
+
return type;
}
-
+
/**
* Checks if the given feature has the expected type otherwise
* an exception is thrown.
- *
+ *
* @param feature
* @param expectedType
- *
+ *
* @throws AnalysisEngineProcessException - if type does not match
*/
- private static void checkFeatureType(Feature feature, String expectedType)
+ private static void checkFeatureType(Feature feature, String expectedType)
throws AnalysisEngineProcessException {
if (!feature.getRange().getName().equals(expectedType)) {
throw new OpenNlpAnnotatorProcessException(
ExceptionMessages.WRONG_FEATURE_TYPE,
- new Object[] {feature.getName(),
+ new Object[] {feature.getName(),
expectedType
});
}
}
-
+
public static Feature getRequiredFeature(Type type, String featureName)
throws AnalysisEngineProcessException {
@@ -98,15 +98,15 @@ public final class AnnotatorUtil {
return feature;
}
-
+
/**
* Retrieves a required feature from the given type.
- *
+ *
* @param type the type
* @param featureName the name of the feature
* @param rangeType the expected range type
* @return the requested parameter
- *
+ *
* @throws AnalysisEngineProcessException
*/
public static Feature getRequiredFeature(Type type, String featureName,
@@ -119,21 +119,21 @@ public final class AnnotatorUtil {
return feature;
}
- public static Feature getRequiredFeatureParameter(UimaContext context, Type type,
+ public static Feature getRequiredFeatureParameter(UimaContext context, Type type,
String featureNameParameter)
throws AnalysisEngineProcessException {
-
+
String featureName;
-
+
try {
featureName = getRequiredStringParameter(context, featureNameParameter);
} catch (ResourceInitializationException e) {
throw new OpenNlpAnnotatorProcessException(e);
}
-
+
return getRequiredFeature(type, featureName);
}
-
+
public static Feature getRequiredFeatureParameter(UimaContext context,
Type type, String featureNameParameter, String rangeTypeName)
throws AnalysisEngineProcessException {
@@ -147,7 +147,7 @@ public final class AnnotatorUtil {
return getRequiredFeature(type, featureName, rangeTypeName);
}
-
+
public static Type getRequiredTypeParameter(UimaContext context,
TypeSystem typeSystem, String parameter)
throws AnalysisEngineProcessException {
@@ -162,88 +162,88 @@ public final class AnnotatorUtil {
return getType(typeSystem, typeName);
}
-
+
/**
* Retrieves a required parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the requested parameter
- *
- * @throws ResourceInitializationException
+ *
+ * @throws ResourceInitializationException
*/
public static String getRequiredStringParameter(UimaContext context,
- String parameter)
+ String parameter)
throws ResourceInitializationException {
-
+
String value = getOptionalStringParameter(context, parameter);
-
+
checkForNull(value, parameter);
-
+
return value;
}
/**
* Retrieves a required parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the requested parameter
- *
- * @throws ResourceInitializationException
+ *
+ * @throws ResourceInitializationException
*/
public static Integer getRequiredIntegerParameter(UimaContext context,
- String parameter)
+ String parameter)
throws ResourceInitializationException {
-
+
Integer value = getOptionalIntegerParameter(context, parameter);
-
+
checkForNull(value, parameter);
-
+
return value;
- }
-
+ }
+
/**
* Retrieves a required parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the requested parameter
- *
- * @throws ResourceInitializationException
+ *
+ * @throws ResourceInitializationException
*/
public static Float getRequiredFloatParameter(UimaContext context,
- String parameter)
+ String parameter)
throws ResourceInitializationException {
-
+
Float value = getOptionalFloatParameter(context, parameter);
-
+
checkForNull(value, parameter);
-
+
return value;
}
-
+
/**
* Retrieves a required parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the requested parameter
- *
- * @throws ResourceInitializationException
+ *
+ * @throws ResourceInitializationException
*/
public static Boolean getRequiredBooleanParameter(UimaContext context,
- String parameter)
+ String parameter)
throws ResourceInitializationException {
-
+
Boolean value = getOptionalBooleanParameter(context, parameter);
-
+
checkForNull(value, parameter);
-
+
return value;
}
-
- private static void checkForNull(Object value, String parameterName)
+
+ private static void checkForNull(Object value, String parameterName)
throws ResourceInitializationException {
if (value == null) {
throw new ResourceInitializationException(
@@ -252,8 +252,8 @@ public final class AnnotatorUtil {
new Object[] {parameterName});
}
}
-
-
+
+
public static Feature getOptionalFeatureParameter(UimaContext context,
Type nameType, String featureNameParameter, String rangeTypeName)
throws AnalysisEngineProcessException {
@@ -271,17 +271,17 @@ public final class AnnotatorUtil {
return null;
}
}
-
- public static Feature getOptionalFeature(Type type, String featureName, String rangeType)
+
+ public static Feature getOptionalFeature(Type type, String featureName, String rangeType)
throws AnalysisEngineProcessException{
Feature feature = type.getFeatureByBaseName(featureName);
-
+
checkFeatureType(feature, rangeType);
-
+
return feature;
}
-
+
public static Type getOptionalTypeParameter(UimaContext context,
TypeSystem typeSystem, String parameter)
throws AnalysisEngineProcessException {
@@ -301,18 +301,18 @@ public final class AnnotatorUtil {
/**
* Retrieves an optional parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the parameter or null if not set
- *
- * @throws ResourceInitializationException
+ *
+ * @throws ResourceInitializationException
*/
public static String getOptionalStringParameter(UimaContext context,
- String parameter)
+ String parameter)
throws ResourceInitializationException {
Object value = getOptionalParameter(context, parameter);
-
+
if (value instanceof String) {
return (String) value;
}
@@ -342,21 +342,21 @@ public final class AnnotatorUtil {
"String array" });
}
}
-
+
/**
* Retrieves an optional parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the parameter or null if not set
- *
+ *
* @throws ResourceInitializationException
*/
public static Integer getOptionalIntegerParameter(UimaContext context,
- String parameter)
+ String parameter)
throws ResourceInitializationException {
Object value = getOptionalParameter(context, parameter);
-
+
if (value instanceof Integer) {
return (Integer) value;
}
@@ -373,19 +373,19 @@ public final class AnnotatorUtil {
/**
* Retrieves an optional parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the parameter or null if not set
- *
- * @throws ResourceInitializationException
+ *
+ * @throws ResourceInitializationException
*/
public static Float getOptionalFloatParameter(UimaContext context,
- String parameter)
+ String parameter)
throws ResourceInitializationException {
Object value = getOptionalParameter(context, parameter);
-
+
if (value instanceof Float) {
return (Float) value;
}
@@ -399,21 +399,21 @@ public final class AnnotatorUtil {
new Object[] {parameter, "Float"});
}
}
-
+
/**
* Retrieves an optional parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the parameter or null if not set
- *
- * @throws ResourceInitializationException
+ *
+ * @throws ResourceInitializationException
*/
public static Boolean getOptionalBooleanParameter(UimaContext context,
- String parameter)
+ String parameter)
throws ResourceInitializationException {
Object value = getOptionalParameter(context, parameter);
-
+
if (value instanceof Boolean) {
return (Boolean) value;
}
@@ -428,29 +428,29 @@ public final class AnnotatorUtil {
}
}
- private static Object getOptionalParameter(UimaContext context,
- String parameter)
+ private static Object getOptionalParameter(UimaContext context,
+ String parameter)
throws ResourceInitializationException {
-
+
Object value = context.getConfigParameterValue(parameter);
-
+
Logger logger = context.getLogger();
-
+
if (logger.isLoggable(Level.INFO)) {
- logger.log(Level.INFO, parameter + " = " +
+ logger.log(Level.INFO, parameter + " = " +
(value != null ? value.toString() : "not set"));
}
-
+
return value;
}
-
+
/**
* Retrieves a resource as stream from the given context.
- *
+ *
* @param context
* @param name
* @return the stream
- *
+ *
* @throws ResourceInitializationException
*/
public static InputStream getResourceAsStream(UimaContext context, String name)
@@ -480,7 +480,7 @@ public final class AnnotatorUtil {
return inResource;
}
-
+
public static Dictionary createOptionalDictionary(UimaContext context,
String dictionaryParameter) throws ResourceInitializationException {
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/CasConsumerUtil.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.util;
@@ -36,38 +36,38 @@ import org.apache.uima.util.Logger;
* This is a util class for cas consumer.
*/
public final class CasConsumerUtil {
-
+
private CasConsumerUtil(){
// this is a util class must not be instanciated
}
-
- public static InputStream getOptionalResourceAsStream(UimaContext context,
+
+ public static InputStream getOptionalResourceAsStream(UimaContext context,
String name) throws ResourceInitializationException {
try {
return context.getResourceAsStream(name);
} catch (ResourceAccessException e) {
throw new ResourceInitializationException(
ResourceInitializationException.STANDARD_MESSAGE_CATALOG,
- new Object[] { "There is an internal error in the UIMA SDK: " +
+ new Object[] { "There is an internal error in the UIMA SDK: " +
e.getMessage(),
e });
- }
+ }
}
-
+
/**
* Retrieves a resource as stream from the given context.
- *
+ *
* @param context
* @param name
* @return the stream
- *
+ *
* @throws ResourceInitializationException
*/
- public static InputStream getResourceAsStream(UimaContext context,
+ public static InputStream getResourceAsStream(UimaContext context,
String name) throws ResourceInitializationException {
-
- InputStream inResource = getOptionalResourceAsStream(context, name);
-
+
+ InputStream inResource = getOptionalResourceAsStream(context, name);
+
if (inResource == null) {
throw new ResourceInitializationException(
ResourceAccessException.STANDARD_MESSAGE_CATALOG,
@@ -76,36 +76,36 @@ public final class CasConsumerUtil {
return inResource;
}
-
+
/**
* Retrieves a type from the given type system.
- *
+ *
* @param typeSystem
* @param name
* @return the type
- *
+ *
* @throws ResourceInitializationException
*/
public static Type getType(TypeSystem typeSystem, String name)
throws ResourceInitializationException {
Type type = getOptionalType(typeSystem, name);
-
+
if (type == null) {
throw new ResourceInitializationException(
ResourceInitializationException.INCOMPATIBLE_RANGE_TYPES,
new Object[] { "Unable to retrieve " + name + " type!" });
}
-
+
return type;
}
-
+
/**
* Retrieves a type from the given type system.
- *
+ *
* @param typeSystem
* @param name
* @return the type
- *
+ *
* @throws ResourceInitializationException
*/
public static Type getOptionalType(TypeSystem typeSystem, String name)
@@ -114,104 +114,104 @@ public final class CasConsumerUtil {
}
/**
* Retrieves a required parameter form the given context.
- *
+ *
* @param context
* @param parameter
* @return the parameter
- *
+ *
* @throws ResourceInitializationException
*/
public static String getRequiredStringParameter(UimaContext context,
String parameter) throws ResourceInitializationException {
String value = getOptionalStringParameter(context, parameter);
-
+
checkForNull(value, parameter);
-
+
return value;
}
/**
* Retrieves a required parameter form the given context.
- *
+ *
* @param context
* @param parameter
* @return the parameter
- *
+ *
* @throws ResourceInitializationException
*/
public static Integer getRequiredIntegerParameter(UimaContext context,
String parameter) throws ResourceInitializationException {
Integer value = getOptionalIntegerParameter(context, parameter);
-
+
checkForNull(value, parameter);
return value;
}
-
+
/**
* Retrieves a required parameter form the given context.
- *
+ *
* @param context
* @param parameter
* @return the parameter
- *
+ *
* @throws ResourceInitializationException
*/
public static Float getRequiredFloatParameter(UimaContext context,
String parameter) throws ResourceInitializationException {
Float value = getOptionalFloatParameter(context, parameter);
-
+
checkForNull(value, parameter);
return value;
}
-
+
/**
* Retrieves a required boolean parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the boolean parameter
- *
+ *
* @throws ResourceInitializationException
*/
- public static Boolean getRequiredBooleanParameter(UimaContext context,
+ public static Boolean getRequiredBooleanParameter(UimaContext context,
String parameter) throws ResourceInitializationException {
-
+
Boolean value = getOptionalBooleanParameter(context, parameter);
-
+
checkForNull(value, parameter);
return value;
}
- private static void checkForNull(Object value, String parameterName)
+ private static void checkForNull(Object value, String parameterName)
throws ResourceInitializationException{
-
+
if (value == null) {
throw new ResourceInitializationException(
ResourceInitializationException.STANDARD_MESSAGE_CATALOG,
- new Object[] { "The " + parameterName + " is a " +
+ new Object[] { "The " + parameterName + " is a " +
"required parameter!" });
}
}
-
+
/**
* Retrieves an optional boolean parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the boolean parameter or null if not set
- * @throws ResourceInitializationException
+ * @throws ResourceInitializationException
*/
public static String getOptionalStringParameter(UimaContext context,
String parameter) throws ResourceInitializationException {
-
+
Object value = getOptionalParameter(context, parameter);
-
+
if (value == null) {
return null;
}
@@ -225,7 +225,7 @@ public final class CasConsumerUtil {
" the expected type String"});
}
}
-
+
public static String[] getOptionalStringArrayParameter(UimaContext context,
String parameter) throws ResourceInitializationException {
@@ -242,10 +242,10 @@ public final class CasConsumerUtil {
+ " does not have the expected type String array" });
}
}
-
+
/**
* Retrieves an optional boolean parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the boolean parameter or null if not set
@@ -253,9 +253,9 @@ public final class CasConsumerUtil {
*/
public static Integer getOptionalIntegerParameter(UimaContext context,
String parameter) throws ResourceInitializationException {
-
+
Object value = getOptionalParameter(context, parameter);
-
+
if (value == null) {
return null;
}
@@ -269,41 +269,41 @@ public final class CasConsumerUtil {
"the expected type Integer"});
}
}
-
+
/**
* Retrieves an optional boolean parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @param defaultValue value to use if the optional parameter is not set
- *
+ *
* @return the boolean parameter or null if not set
* @throws ResourceInitializationException
*/
public static Integer getOptionalIntegerParameter(UimaContext context, String parameter,
int defaultValue) throws ResourceInitializationException {
-
+
Integer value = getOptionalIntegerParameter(context, parameter);
-
+
if (value == null)
value = defaultValue;
-
+
return value;
}
-
+
/**
* Retrieves an optional boolean parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the boolean parameter or null if not set
- * @throws ResourceInitializationException
+ * @throws ResourceInitializationException
*/
public static Float getOptionalFloatParameter(UimaContext context,
String parameter) throws ResourceInitializationException {
-
+
Object value = getOptionalParameter(context, parameter);
-
+
if (value == null) {
return null;
}
@@ -317,20 +317,20 @@ public final class CasConsumerUtil {
" the expected type Float"});
}
}
-
+
/**
* Retrieves an optional boolean parameter from the given context.
- *
+ *
* @param context
* @param parameter
* @return the boolean parameter or null if not set
- * @throws ResourceInitializationException
+ * @throws ResourceInitializationException
*/
public static Boolean getOptionalBooleanParameter(UimaContext context,
String parameter) throws ResourceInitializationException {
-
+
Object value = getOptionalParameter(context, parameter);
-
+
if (value == null) {
return null;
}
@@ -344,47 +344,47 @@ public final class CasConsumerUtil {
" the expected type Boolean"});
}
}
-
- private static Object getOptionalParameter(UimaContext context,
+
+ private static Object getOptionalParameter(UimaContext context,
String parameter) {
-
+
Object value = context.getConfigParameterValue(parameter);
Logger logger = context.getLogger();
-
+
if (logger.isLoggable(Level.INFO)) {
- logger.log(Level.INFO, parameter + " = " +
+ logger.log(Level.INFO, parameter + " = " +
(value != null ? value.toString() : "not set"));
}
-
+
return value;
}
-
+
/**
* Checks if the given feature has the expected type otherwise
* an exception is thrown.
- *
+ *
* @param feature
* @param expectedType
- *
+ *
* @throws ResourceInitializationException - if type does not match
*/
- public static void checkFeatureType(Feature feature, String expectedType)
+ public static void checkFeatureType(Feature feature, String expectedType)
throws ResourceInitializationException {
if (!feature.getRange().getName().equals(expectedType)) {
throw new ResourceInitializationException(
ResourceInitializationException.STANDARD_MESSAGE_CATALOG,
- new Object[] { "The Feature " + feature.getName() +
+ new Object[] { "The Feature " + feature.getName() +
" must be of type " + expectedType + " !"
});
}
}
-
- public static Dictionary createOptionalDictionary(UimaContext context, String parameter)
+
+ public static Dictionary createOptionalDictionary(UimaContext context, String parameter)
throws ResourceInitializationException {
String dictionaryName = CasConsumerUtil.getOptionalStringParameter(
context, parameter);
-
+
Dictionary dictionary = null;
if (dictionaryName != null) {
@@ -397,23 +397,23 @@ public final class CasConsumerUtil {
dictionaryName);
if (dictIn == null) {
- String message = "The dictionary file " + dictionaryName +
+ String message = "The dictionary file " + dictionaryName +
" does not exist!";
if (logger.isLoggable(Level.WARNING)) {
logger.log(Level.WARNING, message);
}
-
+
return null;
}
-
+
dictionary = new Dictionary(dictIn);
} catch (IOException e) {
// if this fails just print error message and continue
String message = "IOException during dictionary reading, "
+ "running without dictionary: " + e.getMessage();
-
+
if (logger.isLoggable(Level.WARNING)) {
logger.log(Level.WARNING, message);
}
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/ContainingConstraint.java Fri May 2 12:34:23 2014
@@ -13,7 +13,7 @@
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
- */
+ */
package opennlp.uima.util;
@@ -30,7 +30,7 @@ import org.apache.uima.cas.text.Annotati
public final class ContainingConstraint implements FSMatchConstraint {
private static final long serialVersionUID = 1;
- private Collection<AnnotationFS> mContainingAnnotations =
+ private Collection<AnnotationFS> mContainingAnnotations =
new LinkedList<AnnotationFS>();
/**
@@ -42,13 +42,13 @@ public final class ContainingConstraint
/**
* Initializes a new instance.
- *
- * @param containingAnnotation
+ *
+ * @param containingAnnotation
*/
public ContainingConstraint(AnnotationFS containingAnnotation) {
mContainingAnnotations.add(containingAnnotation);
}
-
+
/**
* Checks if the given FeatureStructure match the constraint.
*/
Modified: opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/ExceptionMessages.java
URL: http://svn.apache.org/viewvc/opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/ExceptionMessages.java?rev=1591889&r1=1591888&r2=1591889&view=diff
==============================================================================
--- opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/ExceptionMessages.java (original)
+++ opennlp/trunk/opennlp-uima/src/main/java/opennlp/uima/util/ExceptionMessages.java Fri May 2 12:34:23 2014
@@ -22,7 +22,7 @@ package opennlp.uima.util;
* massage catalog.
*/
public class ExceptionMessages {
-
+
public static final String MESSAGE_CATALOG = "opennlp.uima.util.ExceptionMessages";
public static final String IO_ERROR_MODEL_READING = "io_error_model_reading";