You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ea...@apache.org on 2013/09/30 15:33:33 UTC

svn commit: r1527553 - in /uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main: java/org/apache/uima/ducc/sampleapps/ resources/org/apache/uima/ducc/sampleapps/

Author: eae
Date: Mon Sep 30 13:33:32 2013
New Revision: 1527553

URL: http://svn.apache.org/r1527553
Log:
UIMA-3211

Added:
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCC.xml
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCM.xml
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobCasCR.xml

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java Mon Sep 30 13:33:32 2013
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ducc.sampleapps;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.Serialization;
+import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.apache.uima.ducc.Workitem;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.TypeSystemUtil;
+import org.apache.uima.util.XMLSerializer;
+
+public class DuccCasCC extends JCasAnnotator_ImplBase {
+
+  public static final String PARAM_XMICOMPRESSIONLEVEL = "XmiCompressionLevel";
+  public static final String PARAM_USEBINARYSERIALIZATION = "UseBinarySerialization";
+
+  private Logger logger;
+  private String outputFilename=null;
+  private File outFile;
+  private FileOutputStream fos;
+  private ZipOutputStream zos;
+  private boolean useBinaryCas;
+  private int zipCompLevel;
+  private String casExt;
+
+
+  public void initialize(UimaContext aContext) throws ResourceInitializationException {
+    super.initialize(aContext);
+    zipCompLevel = (Integer)getContext().getConfigParameterValue(PARAM_XMICOMPRESSIONLEVEL);
+    useBinaryCas = (null == getContext().getConfigParameterValue(PARAM_USEBINARYSERIALIZATION)) ? Boolean.FALSE :
+    	(Boolean) getContext().getConfigParameterValue(PARAM_USEBINARYSERIALIZATION);
+    logger = aContext.getLogger();
+    if (useBinaryCas) {
+    	zipCompLevel = 0;
+    	casExt = "cas";
+    	logger.log(Level.INFO, "Outputting CASes in UIMA compressed binary form 6");
+    }
+    else {
+    	casExt = "xmi";
+    	logger.log(Level.INFO, "Outputting CASes in XmiCas format, zip compressed at level="+zipCompLevel);
+    }
+  }
+
+  public void process(JCas jcas) throws AnalysisEngineProcessException {
+    Iterator<FeatureStructure> fsit = jcas.getIndexRepository().getAllIndexedFS(jcas.getCasType(Workitem.type));
+    if (fsit.hasNext()) {
+      Workitem wi = (Workitem) fsit.next();
+      if (!outputFilename.equals(wi.getOutputspec())) {
+    	  throw new AnalysisEngineProcessException(new RuntimeException("flush mismatch: "+outputFilename+" != "+wi.getOutputspec()));
+      }
+      try {
+		zos.close();
+		fos.close();
+		if (!outFile.renameTo(new File(outputFilename))) {
+			throw new IOException("Rename failed for "+outputFilename);
+		}
+	} catch (IOException e) {
+		throw new AnalysisEngineProcessException(e);
+	}
+      logger.log(Level.INFO, "DuccDummyCC: Flushed "+wi.getOutputspec());
+      return;
+    }
+
+    fsit = jcas.getIndexRepository().getAllIndexedFS(jcas.getCasType(DuccDocumentInfo.type));
+    if (!fsit.hasNext()) {
+      throw new AnalysisEngineProcessException(new RuntimeException("No DuccDocumentInfo FS in CAS"));
+    }
+    DuccDocumentInfo di = (DuccDocumentInfo) fsit.next();
+    String outputfile = di.getOutputfile();
+    if (!outputfile.equals(outputFilename)) {
+    	// create new output file
+    	outputFilename = outputfile;
+    	try {
+        	outFile = new File(outputFilename+"_temp");
+        	File outDir = outFile.getParentFile();
+        	if (outDir != null && !outDir.exists()) {
+        		outDir.mkdirs();
+        	}
+			fos = new FileOutputStream(outFile);
+			zos = new ZipOutputStream(new BufferedOutputStream(fos,1024*100));
+		    if (useBinaryCas) {
+		      //put the output CAS typesystem in the output zipfile
+		      ZipEntry ze = new ZipEntry("typesystem.xml");
+		      ze.setMethod(ZipEntry.DEFLATED);
+		      zos.setLevel(9);
+		      zos.putNextEntry(ze);
+		      TypeSystem ts = jcas.getTypeSystem();
+		      TypeSystemDescription tsDesc = TypeSystemUtil.typeSystem2TypeSystemDescription(ts);
+		      tsDesc.toXML(zos); // Capture type system in XML format
+		      zos.closeEntry();
+		    }
+			zos.setLevel(zipCompLevel);
+        } catch (Exception e) {
+        	throw new AnalysisEngineProcessException(e);
+        }
+    }
+
+    ZipEntry ze = new ZipEntry("doc_"+di.getDocseq()+"."+casExt);
+    ze.setMethod(ZipEntry.DEFLATED);
+    try {
+		zos.putNextEntry(ze);
+		BufferedOutputStream bos = new BufferedOutputStream(zos,1024*10);
+		if (useBinaryCas) {
+		  Serialization.serializeWithCompression(jcas.getCas(), bos, jcas.getTypeSystem());
+		}
+		else {
+			// write XMI
+		  XmiCasSerializer ser = new XmiCasSerializer(jcas.getTypeSystem());
+		  XMLSerializer xmlSer = new XMLSerializer(bos, false);
+		  ser.serialize(jcas.getCas(), xmlSer.getContentHandler());
+		}
+		bos.flush();
+	    zos.closeEntry();
+	} catch (Exception e) {
+	      throw new AnalysisEngineProcessException(e);
+	}
+
+  }
+
+}

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java Mon Sep 30 13:33:32 2013
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ducc.sampleapps;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasMultiplier_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.Serialization;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
+import org.apache.uima.ducc.Workitem;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.util.CasCreationUtils;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.XMLInputSource;
+import org.apache.uima.util.XMLParser;
+
+public class DuccCasCM extends JCasMultiplier_ImplBase {
+  private String inputFileName;
+  private String outputFileName;
+  private FileInputStream fis;
+  private ZipInputStream zis;
+  private ZipEntry nextEntry;
+  private Workitem wi;
+  private int docInWI;
+  private boolean readingXmiFormat;
+  private TypeSystem inputTS;
+  private Logger logger;
+
+  public boolean hasNext() throws AnalysisEngineProcessException {
+	try {
+		nextEntry = zis.getNextEntry();
+	} catch (IOException e) {
+		throw new AnalysisEngineProcessException(e);
+	}
+	  return (nextEntry != null) ? true : false;
+  }
+
+  public AbstractCas next() throws AnalysisEngineProcessException {
+    JCas newcas = getEmptyJCas();
+    if (0 == docInWI) {
+    	if (nextEntry.getName().equals("typesystem.xml")) {
+        	getTypesystem();
+        	readingXmiFormat = false;
+        }
+        else {
+        	readingXmiFormat = true;
+        }
+    }
+    else {
+  	  if (nextEntry.getName().equals("typesystem.xml")) {
+		  throw new AnalysisEngineProcessException(new RuntimeException(
+				  "typesystem.xml entry found in the middle of input zipfile "+inputFileName));
+	  }
+    }
+    byte[] buff = new byte[10000];
+    int bytesread;
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    try {
+    	while (-1 != (bytesread = zis.read(buff))) {
+    		baos.write(buff,0,bytesread);
+    	}
+        ByteArrayInputStream bis = new ByteArrayInputStream(baos.toByteArray());
+        if (readingXmiFormat) {
+        	XmiCasDeserializer.deserialize(bis, newcas.getCas());
+        }
+        else {
+        	Serialization.deserializeCAS(newcas.getCas(), bis, inputTS, null);
+        }
+	} catch (Exception e) {
+		throw new AnalysisEngineProcessException(e);
+	}
+    Iterator<FeatureStructure> fsit = newcas.getIndexRepository().getAllIndexedFS(newcas.getCasType(DuccDocumentInfo.type));
+    DuccDocumentInfo di;
+    if (fsit.hasNext()) {
+    	di = (DuccDocumentInfo) fsit.next();
+    }
+    else {
+        di = new DuccDocumentInfo(newcas);
+        di.addToIndexes();
+    }
+    di.setInputfile(inputFileName);
+    di.setOutputfile(outputFileName);
+    di.setDocseq(docInWI++);
+    return newcas;
+  }
+
+  @Override
+  public void process(JCas jcas) throws AnalysisEngineProcessException {
+    Iterator<FeatureStructure> fsit = jcas.getIndexRepository().getAllIndexedFS(jcas.getCasType(Workitem.type));
+    if (!fsit.hasNext()) {
+      throw new AnalysisEngineProcessException(new RuntimeException("No workitem FS in CAS"));
+    }
+    wi = (Workitem) fsit.next();
+    logger.log(Level.INFO, "DuccCasCM: inputs "+wi.getInputspec()+" outputs "+wi.getOutputspec());
+    try {
+      openInputFile(wi);
+    } catch (IOException e) {
+      throw new AnalysisEngineProcessException(e);
+    }
+  }
+
+
+  public void initialize(UimaContext aContext) throws ResourceInitializationException {
+    super.initialize(aContext);
+    logger = aContext.getLogger();
+  }
+
+
+  private void openInputFile(Workitem wi) throws IOException {
+    inputFileName = wi.getInputspec();
+    outputFileName = wi.getOutputspec();
+    fis = new FileInputStream(new File(inputFileName));
+    zis = new ZipInputStream(new BufferedInputStream(fis,1024*100));
+    docInWI = 0;
+  }
+
+
+  private void getTypesystem() throws AnalysisEngineProcessException {
+	  byte[] buff = new byte[10000];
+	  int bytesread;
+	  ByteArrayOutputStream baos = new ByteArrayOutputStream();
+	  try {
+	  	while (-1 != (bytesread = zis.read(buff))) {
+	  		baos.write(buff,0,bytesread);
+	  	}
+	  	ByteArrayInputStream bis = new ByteArrayInputStream(baos.toByteArray());
+	  	// Get XML parser from framework
+	  	XMLParser xmlParser = UIMAFramework.getXMLParser();
+	  	// Parse type system descriptor
+	  	TypeSystemDescription tsDesc = xmlParser.parseTypeSystemDescription(new XMLInputSource((InputStream)bis,null));
+	  	// Use type system description to create CAS and get the type system object
+	  	inputTS = CasCreationUtils.createCas(tsDesc, null, null).getTypeSystem();
+	  	// advance to first input CAS
+		nextEntry = zis.getNextEntry();
+		} catch (Exception e) {
+			throw new AnalysisEngineProcessException(e);
+		}
+  }
+}

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java Mon Sep 30 13:33:32 2013
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ducc.sampleapps;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.collection.CollectionReader_ImplBase;
+import org.apache.uima.ducc.Workitem;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+
+/**
+ * A DUCC Job collection reader that reads text files from a directory in the filesystem.
+ * It can be configured with the following parameters:
+ * <ul>
+ * <li><code>InputDirectory</code> - path to directory containing input *.zip files</li>
+ * <li><code>OutputDirectory</code> - path to directory for output files</li>
+ * <li><code>Language</code> (optional) - language of the input documents</li>
+ * <li><code>BlockSize</code> (optional) - Block size used to process input files</li>
+ * <li><code>SendToLast</code> (optional) - flag to route WorkItem CAS to last pipeline component. Only used for jobs with initial CM.</li>
+ * <li><code>SendToAll</code> (optional) - flag to route WorkItem CAS to all pipeline components. Only used for jobs with initial CM.</li>
+ * </ul>
+ * 
+ */
+public class DuccJobCasCR extends CollectionReader_ImplBase {
+  /**
+   * Name of configuration parameter that must be set to the path of a directory containing input
+   * files.
+   */
+  public static final String PARAM_INPUTSPEC = "InputSpec";
+
+  /**
+   * Name of configuration parameter that must be set to the path of the base directory 
+   * where output files will be created.
+   */
+  public static final String PARAM_OUTPUTDIR = "OutputDirectory";
+
+  /**
+   * Name of configuration parameter that indicates if previous output should be ignored.
+   */
+  public static final String PARAM_IGNOREPREVIOUS = "IgnorePreviousOutput";
+
+  /**
+   * Flag to route WorkItem CAS to last pipeline component. Used to flush any output data.
+   * This string is ignored if the Job has a CM component.
+   */
+  public static final String PARAM_SENDTOLAST = "SendToLast";
+
+  /**
+   * Flag to route WorkItem CAS to all pipeline components.
+   * If the Job has no CM component the WI CAS is already sent to AE and CC.
+   */
+  public static final String PARAM_SENDTOALL = "SendToAll";
+  
+  class WorkItem {
+    public WorkItem(String absolutePathIn, String absolutePathOut) {
+      filename=absolutePathIn;
+      outname=absolutePathOut;
+    }
+    String filename;
+    String outname;
+  }
+
+  private ArrayList<WorkItem> mWorkList; 
+
+  private String mInputdirectory;
+
+  private String mOutputdirectory;
+  
+  private Boolean mIgnorePrevious;
+
+  private int mCurrentIndex;
+
+  private Boolean mSendToLast;
+
+  private Boolean mSendToAll;
+
+  private int mPreviouslyDone;
+
+  private Logger logger;
+
+  /**
+   * @see org.apache.uima.collection.CollectionReader_ImplBase#initialize()
+   */
+  public void initialize() throws ResourceInitializationException {
+	logger = getUimaContext().getLogger();
+    String inputSpec = ((String) getConfigParameterValue(PARAM_INPUTSPEC)).trim();
+    mOutputdirectory = ((String) getConfigParameterValue(PARAM_OUTPUTDIR)).trim();
+    mIgnorePrevious = (Boolean) getConfigParameterValue(PARAM_IGNOREPREVIOUS);
+    mSendToLast = (Boolean) getConfigParameterValue(PARAM_SENDTOLAST);
+    mSendToAll = (Boolean) getConfigParameterValue(PARAM_SENDTOALL);
+
+    if (null == mIgnorePrevious) {
+    	mIgnorePrevious = Boolean.FALSE;
+    }
+    if (null == mSendToLast) {
+    	mSendToLast = Boolean.FALSE;
+    }
+    if (null == mSendToAll) {
+    	mSendToAll = Boolean.FALSE;
+    }
+    mCurrentIndex = 0;
+    mPreviouslyDone = 0;
+
+    mInputdirectory = inputSpec;
+    // if input directory does not exist or is not a directory, throw exception
+    File inDirectory = new File(mInputdirectory);
+    if (!inDirectory.exists() || !inDirectory.isDirectory()) {
+      throw new ResourceInitializationException(new RuntimeException( mInputdirectory+" is not a directory"));
+    }
+
+    // if output directory is a file throw exception
+    File outDirectory = new File(mOutputdirectory);
+    if (outDirectory.exists() && !outDirectory.isDirectory()) {
+      throw new ResourceInitializationException(new RuntimeException("Specified output directory "+mOutputdirectory+" is a file"));
+    }
+
+    logger.log(Level.INFO, "Processing \".zip files in"+mInputdirectory);
+    if (null != mIgnorePrevious && mIgnorePrevious) {
+      logger.log(Level.INFO, "Overwriting previous outfiles");
+    }
+
+    // get list of files in the specified directory
+    mWorkList = new ArrayList<WorkItem>();
+    addFilesFromDir(inDirectory, "zip");
+    if (0 < mPreviouslyDone) {
+      logger.log(Level.INFO, "Preserving "+mPreviouslyDone+" output files in "+mOutputdirectory);
+    }
+    logger.log(Level.INFO, "Processing "+mWorkList.size()+" output files in "+mOutputdirectory);
+  }
+  
+  /**
+   * This method adds files in the input directory,
+   * if the respective output file does not exist,
+   * or if mIgnorePrevious = true.
+   * 
+   * @param dir
+   */
+  private void addFilesFromDir(File dir, String ext) {
+    File[] files = dir.listFiles();
+    for (int i = 0; i < files.length; i++) {
+      if (!files[i].isDirectory()) {
+        String outfilename = files[i].getAbsolutePath();
+        if (!outfilename.endsWith("."+ext)) {
+          continue;
+        }
+        outfilename = outfilename.substring(mInputdirectory.length());
+        outfilename = mOutputdirectory+outfilename;
+        File outFile = new File(outfilename);
+        if (!mIgnorePrevious && outFile.exists()) {
+        	mPreviouslyDone++;
+        }
+        if (mIgnorePrevious || !outFile.exists()) {
+        	mWorkList.add(new WorkItem(files[i].getAbsolutePath(),outfilename));
+        	logger.log(Level.FINE, "adding "+outfilename);
+        }
+      }
+    }
+  }
+
+  /**
+   * @see org.apache.uima.collection.CollectionReader#hasNext()
+   */
+  public boolean hasNext() {
+    return mCurrentIndex < mWorkList.size();
+  }
+
+  /**
+   * @see org.apache.uima.collection.CollectionReader#getNext(org.apache.uima.cas.CAS)
+   */
+  public void getNext(CAS aCAS) throws IOException, CollectionException {
+    JCas jcas;
+    try {
+      jcas = aCAS.getJCas();
+      Workitem wi = new Workitem(jcas);
+      wi.setInputspec(mWorkList.get(mCurrentIndex).filename);
+      wi.setOutputspec(mWorkList.get(mCurrentIndex).outname);
+      wi.setSendToLast(mSendToLast);
+      wi.setSendToAll(mSendToAll);
+      wi.addToIndexes();
+      logger.log(Level.INFO, "Sending "+wi.getInputspec());
+      mCurrentIndex++;
+      jcas.setDocumentText(wi.getInputspec());
+    } catch (CASException e) {
+      throw new CollectionException(e);
+    }
+
+    //create WorkItem info structure
+  }
+
+  /**
+   * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#close()
+   */
+  public void close() throws IOException {
+  }
+
+  /**
+   * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#getProgress()
+   */
+  public Progress[] getProgress() {
+    return new Progress[] { new ProgressImpl(mCurrentIndex, mWorkList.size(), Progress.ENTITIES) };
+  }
+
+  /**
+   * Gets the total number of documents that will be returned by this collection reader. This is not
+   * part of the general collection reader interface.
+   * 
+   * @return the number of documents in the collection
+   */
+  public int getNumberOfDocuments() {
+    return mWorkList.size();
+  }
+
+}

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCC.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCC.xml?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCC.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCC.xml Mon Sep 30 13:33:32 2013
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8"?><analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+
+	<!--
+	 ***************************************************************
+	 * Licensed to the Apache Software Foundation (ASF) under one
+	 * or more contributor license agreements.  See the NOTICE file
+	 * distributed with this work for additional information
+	 * regarding copyright ownership.  The ASF licenses this file
+	 * to you under the Apache License, Version 2.0 (the
+	 * "License"); you may not use this file except in compliance
+	 * with the License.  You may obtain a copy of the License at
+         *
+	 *   http://www.apache.org/licenses/LICENSE-2.0
+	 * 
+	 * Unless required by applicable law or agreed to in writing,
+	 * software distributed under the License is distributed on an
+	 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+	 * KIND, either express or implied.  See the License for the
+	 * specific language governing permissions and limitations
+	 * under the License.
+	 ***************************************************************
+   -->
+   
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.uima.ducc.sampleapps.DuccCasCC</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>DuccCasCC</name>
+    <description/>
+    <version>1.0</version>
+    <vendor/>
+    <configurationParameters>
+      <configurationParameter>
+        <name>XmiCompressionLevel</name>
+        <type>Integer</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>UseBinarySerialization</name>
+        <description>Enable use of UIMA compressed binary CAS format</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>XmiCompressionLevel</name>
+        <value>
+          <integer>7</integer>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.ducc.common.uima.DuccJobFlowControlTS"/>
+        <import name="org.apache.uima.ducc.sampleapps.DuccDocumentInfoTS"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>
\ No newline at end of file

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCM.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCM.xml?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCM.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCM.xml Mon Sep 30 13:33:32 2013
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+	<!--
+	 ***************************************************************
+	 * Licensed to the Apache Software Foundation (ASF) under one
+	 * or more contributor license agreements.  See the NOTICE file
+	 * distributed with this work for additional information
+	 * regarding copyright ownership.  The ASF licenses this file
+	 * to you under the Apache License, Version 2.0 (the
+	 * "License"); you may not use this file except in compliance
+	 * with the License.  You may obtain a copy of the License at
+         *
+	 *   http://www.apache.org/licenses/LICENSE-2.0
+	 * 
+	 * Unless required by applicable law or agreed to in writing,
+	 * software distributed under the License is distributed on an
+	 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+	 * KIND, either express or implied.  See the License for the
+	 * specific language governing permissions and limitations
+	 * under the License.
+	 ***************************************************************
+   -->
+   
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.uima.ducc.sampleapps.DuccCasCM</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>DuccCasCM</name>
+    <description>Reads ...</description>
+    <version>1.0</version>
+    <vendor/>
+    <configurationParameters/>
+    <configurationParameterSettings/>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.ducc.common.uima.DuccJobFlowControlTS"/>
+        <import name="org.apache.uima.ducc.sampleapps.DuccDocumentInfoTS"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobCasCR.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobCasCR.xml?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobCasCR.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobCasCR.xml Mon Sep 30 13:33:32 2013
@@ -0,0 +1,98 @@
+<?xml version="1.0" encoding="UTF-8"?><collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+
+	<!--
+	 ***************************************************************
+	 * Licensed to the Apache Software Foundation (ASF) under one
+	 * or more contributor license agreements.  See the NOTICE file
+	 * distributed with this work for additional information
+	 * regarding copyright ownership.  The ASF licenses this file
+	 * to you under the Apache License, Version 2.0 (the
+	 * "License"); you may not use this file except in compliance
+	 * with the License.  You may obtain a copy of the License at
+         *
+	 *   http://www.apache.org/licenses/LICENSE-2.0
+	 * 
+	 * Unless required by applicable law or agreed to in writing,
+	 * software distributed under the License is distributed on an
+	 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+	 * KIND, either express or implied.  See the License for the
+	 * specific language governing permissions and limitations
+	 * under the License.
+	 ***************************************************************
+   -->
+   
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>org.apache.uima.ducc.sampleapps.DuccJobCasCR</implementationName>
+  <processingResourceMetaData>
+    <name>DuccJobCasCR</name>
+    <description>Generates CASes with reference to input.</description>
+    <version>1.0</version>
+    <vendor>Apache UIMA</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>InputSpec</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>OutputDirectory</name>
+        <description>The base output directory</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>IgnorePreviousOutput</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>SendToLast</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>SendToAll</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>InputSpec</name>
+        <value>
+          <string>/tmp</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>OutputDirectory</name>
+        <value>
+          <string>/tmp</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.ducc.common.uima.DuccJobFlowControlTS"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+</collectionReaderDescription>
\ No newline at end of file