You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2012/08/06 14:54:16 UTC
svn commit: r1369824 - in /opennlp/sandbox/corpus-server-connector/desc:
EngTokenizerAndSentdetectCPE.xml SentenceDetector.xml Tokenizer.xml
TokenizerAndSentdetectAAE.xml
Author: joern
Date: Mon Aug 6 12:54:15 2012
New Revision: 1369824
URL: http://svn.apache.org/viewvc?rev=1369824&view=rev
Log:
OPENNLP-261 Added sample to perform sentence detection and tokenization via a CPE.
Added:
opennlp/sandbox/corpus-server-connector/desc/EngTokenizerAndSentdetectCPE.xml (with props)
opennlp/sandbox/corpus-server-connector/desc/SentenceDetector.xml (with props)
opennlp/sandbox/corpus-server-connector/desc/Tokenizer.xml (with props)
opennlp/sandbox/corpus-server-connector/desc/TokenizerAndSentdetectAAE.xml (with props)
Added: opennlp/sandbox/corpus-server-connector/desc/EngTokenizerAndSentdetectCPE.xml
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server-connector/desc/EngTokenizerAndSentdetectCPE.xml?rev=1369824&view=auto
==============================================================================
--- opennlp/sandbox/corpus-server-connector/desc/EngTokenizerAndSentdetectCPE.xml (added)
+++ opennlp/sandbox/corpus-server-connector/desc/EngTokenizerAndSentdetectCPE.xml Mon Aug 6 12:54:15 2012
@@ -0,0 +1,132 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<cpeDescription>
+ <collectionReader>
+ <collectionIterator>
+ <descriptor>
+ <import location="CSQueueCollectionReader.xml" />
+ </descriptor>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>ServerAddress</name>
+ <value>
+ <string>http://localhost:8080/rest</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>CorpusName</name>
+ <value>
+ <string>enwikinews</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>SearchQuery</name>
+ <value>
+ <string>*:*</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>QueueName</name>
+ <value>
+ <string>enwikinewsnlpqueue</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>IdFSTypeName</name>
+ <value>
+ <string>org.apache.opennlp.annotations.ArticleId</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>IdFeatureName</name>
+ <value>
+ <string>id</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ </collectionIterator>
+ </collectionReader>
+ <casProcessors casPoolSize="2" processingUnitThreadCount="1">
+ <casProcessor deployment="integrated" name="TokenizerAndSentdetectAAE">
+ <descriptor>
+ <import location="TokenizerAndSentdetectAAE.xml" />
+ </descriptor>
+ <deploymentParameters />
+ <filter></filter>
+ <errorHandling>
+ <errorRateThreshold action="terminate" value="100/1000" />
+ <maxConsecutiveRestarts action="terminate"
+ value="30" />
+ <timeout max="100000" />
+ </errorHandling>
+ <checkpoint batch="10000" />
+ </casProcessor>
+ <casProcessor deployment="integrated" name="CSWriter">
+ <descriptor>
+ <import location="CSCasWriter.xml" />
+ </descriptor>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>ServerAddress</name>
+ <value>
+ <string>http://localhost:8080/rest</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>CorpusName</name>
+ <value>
+ <string>enwikinews</string>
+ </value>
+ </nameValuePair>
+
+ <nameValuePair>
+ <name>IdFSTypeName</name>
+ <value>
+ <string>org.apache.opennlp.annotations.ArticleId</string>
+ </value>
+ </nameValuePair>
+
+ <nameValuePair>
+ <name>IdFeatureName</name>
+ <value>
+ <string>id</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ <deploymentParameters />
+ <filter></filter>
+ <errorHandling>
+ <errorRateThreshold action="terminate" value="100/1000" />
+ <maxConsecutiveRestarts action="terminate"
+ value="30" />
+ <timeout max="100000" />
+ </errorHandling>
+ <checkpoint batch="10000" />
+ </casProcessor>
+ </casProcessors>
+ <cpeConfig>
+ <numToProcess>-1</numToProcess>
+ <deployAs>immediate</deployAs>
+ <checkpoint file="" time="300000" />
+ <timerImpl></timerImpl>
+ </cpeConfig>
+</cpeDescription>
Propchange: opennlp/sandbox/corpus-server-connector/desc/EngTokenizerAndSentdetectCPE.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: opennlp/sandbox/corpus-server-connector/desc/SentenceDetector.xml
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server-connector/desc/SentenceDetector.xml?rev=1369824&view=auto
==============================================================================
--- opennlp/sandbox/corpus-server-connector/desc/SentenceDetector.xml (added)
+++ opennlp/sandbox/corpus-server-connector/desc/SentenceDetector.xml Mon Aug 6 12:54:15 2012
@@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java
+ </frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>opennlp.uima.sentdetect.SentenceDetector</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>Sentence Detector</name>
+ <description></description>
+ <version>${pom.version}</version>
+ <vendor>Apache Software Foundation</vendor>
+ <configurationParameters>
+ <configurationParameter>
+ <name>opennlp.uima.SentenceType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>opennlp.uima.ContainerType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>opennlp.uima.SentenceType</name>
+ <value>
+ <string>org.apache.opennlp.annotations.Sentence</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+
+ <typeSystemDescription>
+ <imports>
+ <import location="TypeSystem.xml" />
+ </imports>
+ </typeSystemDescription>
+
+ <capabilities>
+ <capability>
+ <inputs />
+ <outputs />
+ <languagesSupported>
+ <language>en</language>
+ </languagesSupported>
+ </capability>
+ </capabilities>
+ </analysisEngineMetaData>
+
+ <externalResourceDependencies>
+ <externalResourceDependency>
+ <key>opennlp.uima.ModelName</key>
+ <interfaceName>opennlp.uima.sentdetect.SentenceModelResource</interfaceName>
+ </externalResourceDependency>
+ </externalResourceDependencies>
+</analysisEngineDescription>
Propchange: opennlp/sandbox/corpus-server-connector/desc/SentenceDetector.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: opennlp/sandbox/corpus-server-connector/desc/Tokenizer.xml
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server-connector/desc/Tokenizer.xml?rev=1369824&view=auto
==============================================================================
--- opennlp/sandbox/corpus-server-connector/desc/Tokenizer.xml (added)
+++ opennlp/sandbox/corpus-server-connector/desc/Tokenizer.xml Mon Aug 6 12:54:15 2012
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>opennlp.uima.tokenize.Tokenizer</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>Tokenizer</name>
+ <description></description>
+ <version>${pom.version}</version>
+ <vendor>Apache Software Foundation</vendor>
+ <configurationParameters>
+ <configurationParameter>
+ <name>opennlp.uima.SentenceType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.TokenType</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+
+ <configurationParameter>
+ <name>opennlp.uima.tokenizer.IsAlphaNumericOptimization</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>opennlp.uima.TokenType</name>
+ <value>
+ <string>org.apache.opennlp.annotations.Token</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>opennlp.uima.SentenceType</name>
+ <value>
+ <string>org.apache.opennlp.annotations.Sentence</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+
+ <typeSystemDescription>
+ <imports>
+ <import location="TypeSystem.xml" />
+ </imports>
+ </typeSystemDescription>
+
+ <capabilities>
+ <capability>
+ <inputs />
+ <outputs />
+ <languagesSupported>
+ <language>en</language>
+ </languagesSupported>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ </operationalProperties>
+ </analysisEngineMetaData>
+
+ <externalResourceDependencies>
+ <externalResourceDependency>
+ <key>opennlp.uima.ModelName</key>
+ <interfaceName>opennlp.uima.tokenize.TokenizerModelResource</interfaceName>
+ </externalResourceDependency>
+ </externalResourceDependencies>
+</analysisEngineDescription>
Propchange: opennlp/sandbox/corpus-server-connector/desc/Tokenizer.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: opennlp/sandbox/corpus-server-connector/desc/TokenizerAndSentdetectAAE.xml
URL: http://svn.apache.org/viewvc/opennlp/sandbox/corpus-server-connector/desc/TokenizerAndSentdetectAAE.xml?rev=1369824&view=auto
==============================================================================
--- opennlp/sandbox/corpus-server-connector/desc/TokenizerAndSentdetectAAE.xml (added)
+++ opennlp/sandbox/corpus-server-connector/desc/TokenizerAndSentdetectAAE.xml Mon Aug 6 12:54:15 2012
@@ -0,0 +1,97 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>false</primitive>
+
+ <delegateAnalysisEngineSpecifiers>
+ <delegateAnalysisEngine key="SentenceDetector">
+ <import location="SentenceDetector.xml" />
+ </delegateAnalysisEngine>
+
+ <delegateAnalysisEngine key="Tokenizer">
+ <import location="Tokenizer.xml" />
+ </delegateAnalysisEngine>
+ </delegateAnalysisEngineSpecifiers>
+
+ <analysisEngineMetaData>
+ <name>TokenizerAndSentdetectAAE</name>
+ <description />
+ <version>${pom.version}</version>
+ <vendor>Apache Software Foundation</vendor>
+ <configurationParameters />
+ <configurationParameterSettings />
+ <flowConstraints>
+ <fixedFlow>
+ <node>SentenceDetector</node>
+ <node>Tokenizer</node>
+ </fixedFlow>
+ </flowConstraints>
+ <capabilities>
+ <capability>
+ <inputs />
+ <outputs />
+ <languagesSupported>
+ <language>en</language>
+ </languagesSupported>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+
+ <resourceManagerConfiguration>
+
+ <externalResources>
+ <externalResource>
+ <name>SentenceModel</name>
+ <fileResourceSpecifier>
+ <fileUrl>file:en-sent.bin</fileUrl>
+ </fileResourceSpecifier>
+ <implementationName>opennlp.uima.sentdetect.SentenceModelResourceImpl</implementationName>
+ </externalResource>
+
+ <externalResource>
+ <name>TokenModel</name>
+ <fileResourceSpecifier>
+ <fileUrl>file:en-token.bin</fileUrl>
+ </fileResourceSpecifier>
+ <implementationName>opennlp.uima.tokenize.TokenizerModelResourceImpl</implementationName>
+ </externalResource>
+ </externalResources>
+
+ <externalResourceBindings>
+ <externalResourceBinding>
+ <key>SentenceDetector/opennlp.uima.ModelName</key>
+ <resourceName>SentenceModel</resourceName>
+ </externalResourceBinding>
+ <externalResourceBinding>
+ <key>Tokenizer/opennlp.uima.ModelName</key>
+ <resourceName>TokenModel</resourceName>
+ </externalResourceBinding>
+ </externalResourceBindings>
+
+ </resourceManagerConfiguration>
+</analysisEngineDescription>
\ No newline at end of file
Propchange: opennlp/sandbox/corpus-server-connector/desc/TokenizerAndSentdetectAAE.xml
------------------------------------------------------------------------------
svn:mime-type = text/plain