You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ea...@apache.org on 2013/09/30 15:36:23 UTC
svn commit: r1527558 - in
/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main:
java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java
resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml
Author: eae
Date: Mon Sep 30 13:36:23 2013
New Revision: 1527558
URL: http://svn.apache.org/r1527558
Log:
UIMA-3149
Modified:
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java?rev=1527558&r1=1527557&r2=1527558&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java Mon Sep 30 13:36:23 2013
@@ -42,9 +42,12 @@ import org.apache.uima.util.ProgressImpl
* <ul>
* <li><code>InputDirectory</code> - path to directory containing input files</li>
* <li><code>OutputDirectory</code> - path to directory for output files</li>
+ * <li><code>IgnorePreviousOutput</code> (optional) - flag to ignore previous output files</li>
* <li><code>Encoding</code> (optional) - character encoding of the input files</li>
* <li><code>Language</code> (optional) - language of the input documents</li>
* <li><code>BlockSize</code> (optional) - Block size used to process input files</li>
+ * <li><code>SendToLast</code> (optional) - flag to route WorkItem CAS to last pipeline component. Only used for jobs with initial CM.</li>
+ * <li><code>SendToAll</code> (optional) - flag to route WorkItem CAS to all pipeline components. Only used for jobs with initial CM.</li>
* </ul>
*
*/
@@ -86,13 +89,16 @@ public class DuccJobTextCR extends Colle
public static final String PARAM_BLOCKSIZE = "BlockSize";
/**
- * Name of configuration parameter specifying the block size used to break input files into work-items.
- * Output files will correspond to the input data found in each block.
- * If not specified, the entire file will be processed as a single work-item.
+ * Flag to route WorkItem CAS to last pipeline component. Used to flush any output data.
+ * This string is ignored if the Job has a CM component.
*/
public static final String PARAM_SENDTOLAST = "SendToLast";
- public static final String PARAM_DEBUG = "Debug";
+ /**
+ * Flag to route WorkItem CAS to all pipeline components.
+ * If the Job has no CM component the WI CAS is already sent to AE and CC.
+ */
+ public static final String PARAM_SENDTOALL = "SendToAll";
class WorkItem {
public WorkItem(String absolutePathIn, String absolutePathOut, int i, long len, long off, boolean end) {
@@ -129,7 +135,7 @@ public class DuccJobTextCR extends Colle
private Boolean mSendToLast;
- private Boolean mDebug;
+ private Boolean mSendToAll;
private int mPreviouslyDone;
@@ -146,7 +152,7 @@ public class DuccJobTextCR extends Colle
mEncoding = (String) getConfigParameterValue(PARAM_ENCODING);
mLanguage = (String) getConfigParameterValue(PARAM_LANGUAGE);
mSendToLast = (Boolean) getConfigParameterValue(PARAM_SENDTOLAST);
- mDebug = (Boolean) getConfigParameterValue(PARAM_DEBUG);
+ mSendToAll = (Boolean) getConfigParameterValue(PARAM_SENDTOALL);
if (null == mIgnorePrevious) {
mIgnorePrevious = Boolean.FALSE;
@@ -154,8 +160,8 @@ public class DuccJobTextCR extends Colle
if (null == mSendToLast) {
mSendToLast = Boolean.FALSE;
}
- if (null == mDebug) {
- mDebug = Boolean.FALSE;
+ if (null == mSendToAll) {
+ mSendToAll = Boolean.FALSE;
}
mCurrentIndex = 0;
mPreviouslyDone = 0;
@@ -167,14 +173,11 @@ public class DuccJobTextCR extends Colle
new Object[] { PARAM_INPUTDIR, this.getMetaData().getName(), inDirectory.getPath() });
}
- // if output directory does not exist or is not a directory, throw exception
+ // if output directory is a file throw exception
File outDirectory = new File(mOutputdirectory);
if (outDirectory.exists() && !outDirectory.isDirectory()) {
throw new ResourceInitializationException(new RuntimeException("Specified output directory "+mOutputdirectory+" is a file"));
}
- if (!outDirectory.exists()) {
- mIgnorePrevious = true;
- }
mBlocksize = 0;
logger.log(Level.INFO, "Processing input files from "+mInputdirectory);
@@ -183,11 +186,10 @@ public class DuccJobTextCR extends Colle
logger.log(Level.INFO, "Using blocksize "+ mBlocksize);
}
if (null != mIgnorePrevious && mIgnorePrevious) {
-// mIgnorePrevious = Boolean.FALSE;
logger.log(Level.INFO, "Overwriting previous outfiles");
}
- // get list of files or file-parts in the specified directory, and subdirectories if recursive
+ // get list of files or file-parts in the specified directory
mWorkList = new ArrayList<WorkItem>();
addFilesFromDir(inDirectory);
if (0 < mPreviouslyDone) {
@@ -211,12 +213,12 @@ public class DuccJobTextCR extends Colle
outfilename = outfilename.substring(mInputdirectory.length());
outfilename = mOutputdirectory+outfilename;
if (mBlocksize == 0) {
- File outFile = new File(outfilename+".processed");
+ File outFile = new File(outfilename+"_processed.zip");
if (!mIgnorePrevious && outFile.exists()) {
mPreviouslyDone++;
}
if (mIgnorePrevious || !outFile.exists()) {
- mWorkList.add(new WorkItem(files[i].getAbsolutePath(),outfilename+".processed",0,files[i].length(),0,true));
+ mWorkList.add(new WorkItem(files[i].getAbsolutePath(),outfilename+"_processed.zip",0,files[i].length(),0,true));
logger.log(Level.FINE, "adding "+outfilename);
}
}
@@ -228,12 +230,12 @@ public class DuccJobTextCR extends Colle
while (fsize > 0) {
String outfilechunk = outfilename+"_"+j;
long length = (fsize < mBlocksize) ? fsize : mBlocksize;
- File outFile = new File(outfilechunk+".processed");
+ File outFile = new File(outfilechunk+"_processed.zip");
if (!mIgnorePrevious && outFile.exists()) {
mPreviouslyDone++;
}
if (mIgnorePrevious || !outFile.exists()) {
- mWorkList.add(new WorkItem(files[i].getAbsolutePath(),outfilechunk+".processed",j,length,offset,fsize==length));
+ mWorkList.add(new WorkItem(files[i].getAbsolutePath(),outfilechunk+"_processed.zip",j,length,offset,fsize==length));
logger.log(Level.FINE, "adding "+outfilechunk);
}
j++;
@@ -272,6 +274,7 @@ public class DuccJobTextCR extends Colle
wi.setLanguage(mLanguage);
}
wi.setSendToLast(mSendToLast);
+ wi.setSendToAll(mSendToAll);
wi.addToIndexes();
wi.setLastBlock(mWorkList.get(mCurrentIndex).last);
logger.log(Level.INFO, "Sending "+wi.getInputspec()+" index="+wi.getBlockindex()+" last="+wi.getLastBlock()+" length="+wi.getBytelength());
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml?rev=1527558&r1=1527557&r2=1527558&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml Mon Sep 30 13:36:23 2013
@@ -1,4 +1,26 @@
<?xml version="1.0" encoding="UTF-8"?><typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+
+ <!--
+ ***************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ***************************************************************
+ -->
+
<name>Ducc Document Info</name>
<description>Type for communication between CR, CM, FC and CC</description>
<version>1.0</version>