You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2009/09/10 17:06:37 UTC
svn commit: r813470 - in /incubator/uima/uimaj/trunk:
uimaj-examples/src/main/java/org/apache/uima/examples/RunAE.java
uimaj-tools/src/main/java/org/apache/uima/tools/RunAE.java
Author: schor
Date: Thu Sep 10 15:06:37 2009
New Revision: 813470
URL: http://svn.apache.org/viewvc?rev=813470&view=rev
Log:
[UIMA-1015] change call to avoid use of deprecated method. Also, make RunAE in uimaj-tools and RunAe in uimaj-examples match - mostly copying some additional capabilities in the tools one to the examples.
Modified:
incubator/uima/uimaj/trunk/uimaj-examples/src/main/java/org/apache/uima/examples/RunAE.java
incubator/uima/uimaj/trunk/uimaj-tools/src/main/java/org/apache/uima/tools/RunAE.java
Modified: incubator/uima/uimaj/trunk/uimaj-examples/src/main/java/org/apache/uima/examples/RunAE.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-examples/src/main/java/org/apache/uima/examples/RunAE.java?rev=813470&r1=813469&r2=813470&view=diff
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-examples/src/main/java/org/apache/uima/examples/RunAE.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-examples/src/main/java/org/apache/uima/examples/RunAE.java Thu Sep 10 15:06:37 2009
@@ -36,6 +36,7 @@
import org.apache.uima.collection.metadata.CasProcessorConfigurationParameterSettings;
import org.apache.uima.collection.metadata.CpeCasProcessor;
import org.apache.uima.collection.metadata.CpeCollectionReader;
+import org.apache.uima.collection.metadata.CpeComponentDescriptor;
import org.apache.uima.collection.metadata.CpeDescription;
import org.apache.uima.collection.metadata.CpeSofaMapping;
import org.apache.uima.collection.metadata.CpeSofaMappings;
@@ -64,7 +65,7 @@
* documents, that contains the text to be analyzed. The text will also be detagged. If this option
* is not specified, the entire document will be processed. <br>
* -l <ISO code> (Language) - specifies the ISO code for the language of the input documents.
- * Some AEs require this. <br>
+ * Some AEs (e.g. PersonTitleAnnotator) require this. <br>
* -e <Encoding> - specifies character encoding of the input documents. The default is UTF-8.
* <br>
* -q (Quiet) - supresses progress messages that are normally printed as each document is processed.
@@ -94,6 +95,10 @@
private boolean xcasInput = false;
+ private boolean xmiInput = false;
+
+ private boolean xLenient = false;
+
int docsProcessed;
private CollectionProcessingEngine mCPE;
@@ -126,14 +131,22 @@
.getAbsolutePath());
crSettings.setParameterValue(FileSystemCollectionReader.PARAM_ENCODING, encoding);
crSettings.setParameterValue(FileSystemCollectionReader.PARAM_LANGUAGE, language);
- crSettings.setParameterValue(FileSystemCollectionReader.PARAM_XCAS, Boolean
- .toString(xcasInput));
+ if (xcasInput) {
+ crSettings.setParameterValue(FileSystemCollectionReader.PARAM_XCAS, "XCAS");
+ } else if (xmiInput) {
+ crSettings.setParameterValue(FileSystemCollectionReader.PARAM_XCAS, "XMI");
+ }
+ if (xLenient) {
+ crSettings.setParameterValue(FileSystemCollectionReader.PARAM_LENIENT, "true");
+ }
// if XML tag was specified, configure XmlDetagger annotator and add to CPE
CpeCasProcessor xmlDetaggerCasProc = null;
if (xmlTagName != null && xmlTagName.length() > 0) {
xmlDetaggerCasProc = CpeDescriptorFactory.produceCasProcessor("XmlDetagger");
- xmlDetaggerCasProc.setDescriptor(XmlDetagger.getDescriptorURL().toString());
+ CpeComponentDescriptor cpeComponentDescriptor =
+ CpeDescriptorFactory.produceComponentDescriptor(XmlDetagger.getDescriptorURL().toString());
+ xmlDetaggerCasProc.setCpeComponentDescriptor(cpeComponentDescriptor);
CasProcessorConfigurationParameterSettings detaggerSettings = CpeDescriptorFactory
.produceCasProcessorConfigurationParameterSettings();
xmlDetaggerCasProc.setConfigurationParameterSettings(detaggerSettings);
@@ -144,7 +157,9 @@
// add user's AE to CPE
CpeCasProcessor casProc = CpeDescriptorFactory.produceCasProcessor("UserAE");
- casProc.setDescriptor(aeSpecifierFile.getAbsolutePath());
+ CpeComponentDescriptor cpeComponentDescriptor =
+ CpeDescriptorFactory.produceComponentDescriptor(aeSpecifierFile.getAbsolutePath());
+ casProc.setCpeComponentDescriptor(cpeComponentDescriptor);
casProc.setMaxErrorCount(0);
cpeDesc.addCasProcessor(casProc);
@@ -153,14 +168,19 @@
CpeCasProcessor casCon = null;
if (outputDir != null) {
casCon = CpeDescriptorFactory.produceCasProcessor("CasConsumer");
- casCon.setDescriptor(InlineXmlCasConsumer.getDescriptorURL().toString());
+ cpeComponentDescriptor =
+ CpeDescriptorFactory.produceComponentDescriptor(InlineXmlCasConsumer.getDescriptorURL().toString());
+ casCon.setCpeComponentDescriptor(cpeComponentDescriptor);
CasProcessorConfigurationParameterSettings consumerSettings = CpeDescriptorFactory
.produceCasProcessorConfigurationParameterSettings();
casCon.setConfigurationParameterSettings(consumerSettings);
consumerSettings.setParameterValue(InlineXmlCasConsumer.PARAM_OUTPUTDIR, outputDir
.getAbsolutePath());
- consumerSettings.setParameterValue(InlineXmlCasConsumer.PARAM_XCAS, Boolean
- .toString(xcasInput));
+ if (xcasInput) {
+ consumerSettings.setParameterValue(InlineXmlCasConsumer.PARAM_XCAS, "XCAS");
+ } else if (xmiInput) {
+ consumerSettings.setParameterValue(InlineXmlCasConsumer.PARAM_XCAS, "XMI");
+ }
casCon.setMaxErrorCount(0);
cpeDesc.addCasProcessor(casCon);
}
@@ -231,9 +251,9 @@
((Throwable) iter.next()).printStackTrace();
}
} else if (genProgressMessages) {
- // retreive the filename of the input file from the CAS
+ // retrieve the filename of the input file from the CAS
// (it was put there by the FileSystemCollectionReader)
- if (!xcasInput) {
+ if (!(xcasInput || xmiInput)) {
Type fileLocType = aCas.getTypeSystem().getType(
"org.apache.uima.examples.SourceDocumentInformation");
Feature fileNameFeat = fileLocType.getFeatureByBaseName("uri");
@@ -309,6 +329,10 @@
System.err.println("-s<x> (Stats level) - determines the verboseness of "
+ "performance statistics. s0=none, s1=brief, s2=full. The default is brief.");
System.err.println("-x - process input files as XCAS files.");
+ System.err.println("-xmi - process input files as XmiCas files.");
+ System.err.println("-lenient - ignore out-of-typesystem content when deserializing XML files.");
+ System.err.println("-l <ISO language> - specify the ISO Language code to set.");
+ System.err.println("-e <encoding> - specify the character encoding to use.");
}
@@ -355,6 +379,16 @@
} else if (arg.equals("-x")) // XCAS file input
{
xcasInput = true;
+ } else if (arg.equals("-xmi")) // XMI file input
+ {
+ xmiInput = true;
+ } else if (arg.equals("-lenient")) // lenient XML deserialization
+ {
+ xLenient = true;
+ } else if (arg.startsWith("-")) // invalid option
+ {
+ System.err.println(arg + " is not a valid option");
+ return false;
} else // one of the standard params - whichever we haven't read yet
{
if (aeSpecifierFile == null) {
Modified: incubator/uima/uimaj/trunk/uimaj-tools/src/main/java/org/apache/uima/tools/RunAE.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-tools/src/main/java/org/apache/uima/tools/RunAE.java?rev=813470&r1=813469&r2=813470&view=diff
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-tools/src/main/java/org/apache/uima/tools/RunAE.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-tools/src/main/java/org/apache/uima/tools/RunAE.java Thu Sep 10 15:06:37 2009
@@ -36,6 +36,7 @@
import org.apache.uima.collection.metadata.CasProcessorConfigurationParameterSettings;
import org.apache.uima.collection.metadata.CpeCasProcessor;
import org.apache.uima.collection.metadata.CpeCollectionReader;
+import org.apache.uima.collection.metadata.CpeComponentDescriptor;
import org.apache.uima.collection.metadata.CpeDescription;
import org.apache.uima.collection.metadata.CpeSofaMapping;
import org.apache.uima.collection.metadata.CpeSofaMappings;
@@ -64,7 +65,7 @@
* documents, that contains the text to be analyzed. The text will also be detagged. If this option
* is not specified, the entire document will be processed. <br>
* -l <ISO code> (Language) - specifies the ISO code for the language of the input documents.
- * Some AEs require this. <br>
+ * Some AEs (e.g. PersonTitleAnnotator) require this. <br>
* -e <Encoding> - specifies character encoding of the input documents. The default is UTF-8.
* <br>
* -q (Quiet) - supresses progress messages that are normally printed as each document is processed.
@@ -143,7 +144,9 @@
CpeCasProcessor xmlDetaggerCasProc = null;
if (xmlTagName != null && xmlTagName.length() > 0) {
xmlDetaggerCasProc = CpeDescriptorFactory.produceCasProcessor("XmlDetagger");
- xmlDetaggerCasProc.setDescriptor(XmlDetagger.getDescriptorURL().toString());
+ CpeComponentDescriptor cpeComponentDescriptor =
+ CpeDescriptorFactory.produceComponentDescriptor(XmlDetagger.getDescriptorURL().toString());
+ xmlDetaggerCasProc.setCpeComponentDescriptor(cpeComponentDescriptor);
CasProcessorConfigurationParameterSettings detaggerSettings = CpeDescriptorFactory
.produceCasProcessorConfigurationParameterSettings();
xmlDetaggerCasProc.setConfigurationParameterSettings(detaggerSettings);
@@ -154,7 +157,9 @@
// add user's AE to CPE
CpeCasProcessor casProc = CpeDescriptorFactory.produceCasProcessor("UserAE");
- casProc.setDescriptor(aeSpecifierFile.getAbsolutePath());
+ CpeComponentDescriptor cpeComponentDescriptor =
+ CpeDescriptorFactory.produceComponentDescriptor(aeSpecifierFile.getAbsolutePath());
+ casProc.setCpeComponentDescriptor(cpeComponentDescriptor);
casProc.setMaxErrorCount(0);
cpeDesc.addCasProcessor(casProc);
@@ -163,7 +168,9 @@
CpeCasProcessor casCon = null;
if (outputDir != null) {
casCon = CpeDescriptorFactory.produceCasProcessor("CasConsumer");
- casCon.setDescriptor(InlineXmlCasConsumer.getDescriptorURL().toString());
+ cpeComponentDescriptor =
+ CpeDescriptorFactory.produceComponentDescriptor(InlineXmlCasConsumer.getDescriptorURL().toString());
+ casCon.setCpeComponentDescriptor(cpeComponentDescriptor);
CasProcessorConfigurationParameterSettings consumerSettings = CpeDescriptorFactory
.produceCasProcessorConfigurationParameterSettings();
casCon.setConfigurationParameterSettings(consumerSettings);
@@ -243,7 +250,7 @@
((Throwable) iter.next()).printStackTrace();
}
} else if (genProgressMessages) {
- // retreive the filename of the input file from the CAS
+ // retrieve the filename of the input file from the CAS
// (it was put there by the FileSystemCollectionReader)
if (!(xcasInput || xmiInput)) {
Type fileLocType = aCas.getTypeSystem().getType(