You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ea...@apache.org on 2013/08/05 23:34:40 UTC

svn commit: r1510744 - in /uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main: java/org/apache/uima/ducc/sampleapps/ resources/org/apache/uima/ducc/sampleapps/

Author: eae
Date: Mon Aug  5 21:34:40 2013
New Revision: 1510744

URL: http://svn.apache.org/r1510744
Log:
UIMA-3149 First part of the text sample app

Added:
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccDocumentInfo.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccDocumentInfo_Type.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCC.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobTextCR.xml
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccTextCC.xml
    uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccTextCM.xml

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccDocumentInfo.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccDocumentInfo.java?rev=1510744&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccDocumentInfo.java (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccDocumentInfo.java Mon Aug  5 21:34:40 2013
@@ -0,0 +1,126 @@
+
+
+/* First created by JCasGen Wed Jul 31 15:14:59 EDT 2013 */
+package org.apache.uima.ducc.sampleapps;
+
+import org.apache.uima.jcas.JCas; 
+import org.apache.uima.jcas.JCasRegistry;
+import org.apache.uima.jcas.cas.TOP_Type;
+
+import org.apache.uima.jcas.cas.TOP;
+
+
+/** 
+ * Updated by JCasGen Thu Aug 01 14:48:37 EDT 2013
+ * XML source: /users1/eae/workspace-ducc/uima-ducc/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml
+ * @generated */
+public class DuccDocumentInfo extends TOP {
+  /** @generated
+   * @ordered 
+   */
+  public final static int typeIndexID = JCasRegistry.register(DuccDocumentInfo.class);
+  /** @generated
+   * @ordered 
+   */
+  public final static int type = typeIndexID;
+  /** @generated  */
+  public              int getTypeIndexID() {return typeIndexID;}
+ 
+  /** Never called.  Disable default constructor
+   * @generated */
+  protected DuccDocumentInfo() {}
+    
+  /** Internal - constructor used by generator 
+   * @generated */
+  public DuccDocumentInfo(int addr, TOP_Type type) {
+    super(addr, type);
+    readObject();
+  }
+  
+  /** @generated */
+  public DuccDocumentInfo(JCas jcas) {
+    super(jcas);
+    readObject();   
+  } 
+
+  /** <!-- begin-user-doc -->
+    * Write your own initialization here
+    * <!-- end-user-doc -->
+  @generated modifiable */
+  private void readObject() {}
+     
+ 
+    
+  //*--------------*
+  //* Feature: inputfile
+
+  /** getter for inputfile - gets 
+   * @generated */
+  public String getInputfile() {
+    if (DuccDocumentInfo_Type.featOkTst && ((DuccDocumentInfo_Type)jcasType).casFeat_inputfile == null)
+      jcasType.jcas.throwFeatMissing("inputfile", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    return jcasType.ll_cas.ll_getStringValue(addr, ((DuccDocumentInfo_Type)jcasType).casFeatCode_inputfile);}
+    
+  /** setter for inputfile - sets  
+   * @generated */
+  public void setInputfile(String v) {
+    if (DuccDocumentInfo_Type.featOkTst && ((DuccDocumentInfo_Type)jcasType).casFeat_inputfile == null)
+      jcasType.jcas.throwFeatMissing("inputfile", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    jcasType.ll_cas.ll_setStringValue(addr, ((DuccDocumentInfo_Type)jcasType).casFeatCode_inputfile, v);}    
+   
+    
+  //*--------------*
+  //* Feature: outputfile
+
+  /** getter for outputfile - gets 
+   * @generated */
+  public String getOutputfile() {
+    if (DuccDocumentInfo_Type.featOkTst && ((DuccDocumentInfo_Type)jcasType).casFeat_outputfile == null)
+      jcasType.jcas.throwFeatMissing("outputfile", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    return jcasType.ll_cas.ll_getStringValue(addr, ((DuccDocumentInfo_Type)jcasType).casFeatCode_outputfile);}
+    
+  /** setter for outputfile - sets  
+   * @generated */
+  public void setOutputfile(String v) {
+    if (DuccDocumentInfo_Type.featOkTst && ((DuccDocumentInfo_Type)jcasType).casFeat_outputfile == null)
+      jcasType.jcas.throwFeatMissing("outputfile", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    jcasType.ll_cas.ll_setStringValue(addr, ((DuccDocumentInfo_Type)jcasType).casFeatCode_outputfile, v);}    
+   
+    
+  //*--------------*
+  //* Feature: docseq
+
+  /** getter for docseq - gets document sequence within work item
+   * @generated */
+  public int getDocseq() {
+    if (DuccDocumentInfo_Type.featOkTst && ((DuccDocumentInfo_Type)jcasType).casFeat_docseq == null)
+      jcasType.jcas.throwFeatMissing("docseq", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    return jcasType.ll_cas.ll_getIntValue(addr, ((DuccDocumentInfo_Type)jcasType).casFeatCode_docseq);}
+    
+  /** setter for docseq - sets document sequence within work item 
+   * @generated */
+  public void setDocseq(int v) {
+    if (DuccDocumentInfo_Type.featOkTst && ((DuccDocumentInfo_Type)jcasType).casFeat_docseq == null)
+      jcasType.jcas.throwFeatMissing("docseq", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    jcasType.ll_cas.ll_setIntValue(addr, ((DuccDocumentInfo_Type)jcasType).casFeatCode_docseq, v);}    
+   
+    
+  //*--------------*
+  //* Feature: byteoffset
+
+  /** getter for byteoffset - gets offset of byte location of first character in document
+   * @generated */
+  public int getByteoffset() {
+    if (DuccDocumentInfo_Type.featOkTst && ((DuccDocumentInfo_Type)jcasType).casFeat_byteoffset == null)
+      jcasType.jcas.throwFeatMissing("byteoffset", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    return jcasType.ll_cas.ll_getIntValue(addr, ((DuccDocumentInfo_Type)jcasType).casFeatCode_byteoffset);}
+    
+  /** setter for byteoffset - sets offset of byte location of first character in document 
+   * @generated */
+  public void setByteoffset(int v) {
+    if (DuccDocumentInfo_Type.featOkTst && ((DuccDocumentInfo_Type)jcasType).casFeat_byteoffset == null)
+      jcasType.jcas.throwFeatMissing("byteoffset", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    jcasType.ll_cas.ll_setIntValue(addr, ((DuccDocumentInfo_Type)jcasType).casFeatCode_byteoffset, v);}    
+  }
+
+    
\ No newline at end of file

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccDocumentInfo_Type.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccDocumentInfo_Type.java?rev=1510744&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccDocumentInfo_Type.java (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccDocumentInfo_Type.java Mon Aug  5 21:34:40 2013
@@ -0,0 +1,145 @@
+
+/* First created by JCasGen Wed Jul 31 15:14:59 EDT 2013 */
+package org.apache.uima.ducc.sampleapps;
+
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.JCasRegistry;
+import org.apache.uima.cas.impl.CASImpl;
+import org.apache.uima.cas.impl.FSGenerator;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.impl.TypeImpl;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.impl.FeatureImpl;
+import org.apache.uima.cas.Feature;
+import org.apache.uima.jcas.cas.TOP_Type;
+
+/** 
+ * Updated by JCasGen Thu Aug 01 14:48:37 EDT 2013
+ * @generated */
+public class DuccDocumentInfo_Type extends TOP_Type {
+  /** @generated */
+  protected FSGenerator getFSGenerator() {return fsGenerator;}
+  /** @generated */
+  private final FSGenerator fsGenerator = 
+    new FSGenerator() {
+      public FeatureStructure createFS(int addr, CASImpl cas) {
+  			 if (DuccDocumentInfo_Type.this.useExistingInstance) {
+  			   // Return eq fs instance if already created
+  		     FeatureStructure fs = DuccDocumentInfo_Type.this.jcas.getJfsFromCaddr(addr);
+  		     if (null == fs) {
+  		       fs = new DuccDocumentInfo(addr, DuccDocumentInfo_Type.this);
+  			   DuccDocumentInfo_Type.this.jcas.putJfsFromCaddr(addr, fs);
+  			   return fs;
+  		     }
+  		     return fs;
+        } else return new DuccDocumentInfo(addr, DuccDocumentInfo_Type.this);
+  	  }
+    };
+  /** @generated */
+  public final static int typeIndexID = DuccDocumentInfo.typeIndexID;
+  /** @generated 
+     @modifiable */
+  public final static boolean featOkTst = JCasRegistry.getFeatOkTst("org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+ 
+  /** @generated */
+  final Feature casFeat_inputfile;
+  /** @generated */
+  final int     casFeatCode_inputfile;
+  /** @generated */ 
+  public String getInputfile(int addr) {
+        if (featOkTst && casFeat_inputfile == null)
+      jcas.throwFeatMissing("inputfile", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    return ll_cas.ll_getStringValue(addr, casFeatCode_inputfile);
+  }
+  /** @generated */    
+  public void setInputfile(int addr, String v) {
+        if (featOkTst && casFeat_inputfile == null)
+      jcas.throwFeatMissing("inputfile", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    ll_cas.ll_setStringValue(addr, casFeatCode_inputfile, v);}
+    
+  
+ 
+  /** @generated */
+  final Feature casFeat_outputfile;
+  /** @generated */
+  final int     casFeatCode_outputfile;
+  /** @generated */ 
+  public String getOutputfile(int addr) {
+        if (featOkTst && casFeat_outputfile == null)
+      jcas.throwFeatMissing("outputfile", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    return ll_cas.ll_getStringValue(addr, casFeatCode_outputfile);
+  }
+  /** @generated */    
+  public void setOutputfile(int addr, String v) {
+        if (featOkTst && casFeat_outputfile == null)
+      jcas.throwFeatMissing("outputfile", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    ll_cas.ll_setStringValue(addr, casFeatCode_outputfile, v);}
+    
+  
+ 
+  /** @generated */
+  final Feature casFeat_docseq;
+  /** @generated */
+  final int     casFeatCode_docseq;
+  /** @generated */ 
+  public int getDocseq(int addr) {
+        if (featOkTst && casFeat_docseq == null)
+      jcas.throwFeatMissing("docseq", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    return ll_cas.ll_getIntValue(addr, casFeatCode_docseq);
+  }
+  /** @generated */    
+  public void setDocseq(int addr, int v) {
+        if (featOkTst && casFeat_docseq == null)
+      jcas.throwFeatMissing("docseq", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    ll_cas.ll_setIntValue(addr, casFeatCode_docseq, v);}
+    
+  
+ 
+  /** @generated */
+  final Feature casFeat_byteoffset;
+  /** @generated */
+  final int     casFeatCode_byteoffset;
+  /** @generated */ 
+  public int getByteoffset(int addr) {
+        if (featOkTst && casFeat_byteoffset == null)
+      jcas.throwFeatMissing("byteoffset", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    return ll_cas.ll_getIntValue(addr, casFeatCode_byteoffset);
+  }
+  /** @generated */    
+  public void setByteoffset(int addr, int v) {
+        if (featOkTst && casFeat_byteoffset == null)
+      jcas.throwFeatMissing("byteoffset", "org.apache.uima.ducc.sampleapps.DuccDocumentInfo");
+    ll_cas.ll_setIntValue(addr, casFeatCode_byteoffset, v);}
+    
+  
+
+
+
+  /** initialize variables to correspond with Cas Type and Features
+	* @generated */
+  public DuccDocumentInfo_Type(JCas jcas, Type casType) {
+    super(jcas, casType);
+    casImpl.getFSClassRegistry().addGeneratorForType((TypeImpl)this.casType, getFSGenerator());
+
+ 
+    casFeat_inputfile = jcas.getRequiredFeatureDE(casType, "inputfile", "uima.cas.String", featOkTst);
+    casFeatCode_inputfile  = (null == casFeat_inputfile) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_inputfile).getCode();
+
+ 
+    casFeat_outputfile = jcas.getRequiredFeatureDE(casType, "outputfile", "uima.cas.String", featOkTst);
+    casFeatCode_outputfile  = (null == casFeat_outputfile) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_outputfile).getCode();
+
+ 
+    casFeat_docseq = jcas.getRequiredFeatureDE(casType, "docseq", "uima.cas.Integer", featOkTst);
+    casFeatCode_docseq  = (null == casFeat_docseq) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_docseq).getCode();
+
+ 
+    casFeat_byteoffset = jcas.getRequiredFeatureDE(casType, "byteoffset", "uima.cas.Integer", featOkTst);
+    casFeatCode_byteoffset  = (null == casFeat_byteoffset) ? JCas.INVALID_FEATURE_CODE : ((FeatureImpl)casFeat_byteoffset).getCode();
+
+  }
+}
+
+
+
+    
\ No newline at end of file

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java?rev=1510744&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java Mon Aug  5 21:34:40 2013
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ducc.sampleapps;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.collection.CollectionReader_ImplBase;
+import org.apache.uima.ducc.Workitem;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceConfigurationException;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+
+/**
+ * A simple DUCC Job collection reader that reads text files from a directory in the filesystem.
+ * It can be configured with the following parameters:
+ * <ul>
+ * <li><code>InputDirectory</code> - path to directory containing input files</li>
+ * <li><code>OutputDirectory</code> - path to directory for output files</li>
+ * <li><code>Encoding</code> (optional) - character encoding of the input files</li>
+ * <li><code>Language</code> (optional) - language of the input documents</li>
+ * <li><code>BlockSize</code> (optional) - Block size used to process input files</li>
+ * </ul>
+ * 
+ */
+public class DuccJobTextCR extends CollectionReader_ImplBase {
+  /**
+   * Name of configuration parameter that must be set to the path of a directory containing input
+   * files.
+   */
+  public static final String PARAM_INPUTDIR = "InputDirectory";
+
+  /**
+   * Name of configuration parameter that must be set to the path of the base directory 
+   * where output files will be created.
+   */
+  public static final String PARAM_OUTPUTDIR = "OutputDirectory";
+
+  /**
+   * Name of configuration parameter that indicates if previous output should be ignored.
+   */
+  public static final String PARAM_IGNOREPREVIOUS = "IgnorePreviousOutput";
+
+  /**
+   * Name of configuration parameter that contains the character encoding used by the input files.
+   * If not specified, the default system encoding will be used.
+   */
+  public static final String PARAM_ENCODING = "Encoding";
+
+  /**
+   * Name of optional configuration parameter that contains the language of the documents in the
+   * input directory. If specified this information will be added to the CAS.
+   */
+  public static final String PARAM_LANGUAGE = "Language";
+
+  /**
+   * Name of configuration parameter specifying the block size used to break input files into work-items.
+   * Output files will correspond to the input data found in each block.
+   * If not specified, the entire file will be processed as a single work-item.
+   */
+  public static final String PARAM_BLOCKSIZE = "BlockSize";
+
+  /**
+   * Name of configuration parameter specifying the block size used to break input files into work-items.
+   * Output files will correspond to the input data found in each block.
+   * If not specified, the entire file will be processed as a single work-item.
+   */
+  public static final String PARAM_SENDTOLAST = "SendToLast";
+
+  public static final String PARAM_DEBUG = "Debug";
+  
+  class WorkItem {
+    public WorkItem(String absolutePathIn, String absolutePathOut, int i, long len, long off, boolean end) {
+      filename=absolutePathIn;
+      outname=absolutePathOut;
+      index=i;
+      length=(int)len;
+      offset=(int)off;
+      last=end;
+    }
+    String filename;
+    String outname;
+    int index;
+    int offset;
+    int length;
+    boolean last;
+  }
+
+  private ArrayList<WorkItem> mWorkList; 
+
+  private String mInputdirectory;
+
+  private String mOutputdirectory;
+  
+  private Boolean mIgnorePrevious;
+
+  private String mEncoding;
+
+  private String mLanguage;
+
+  private int mBlocksize;
+
+  private int mCurrentIndex;
+
+  private Boolean mSendToLast;
+
+  private Boolean mDebug;
+
+  private int mPreviouslyDone;
+
+  private Logger logger;
+
+  /**
+   * @see org.apache.uima.collection.CollectionReader_ImplBase#initialize()
+   */
+  public void initialize() throws ResourceInitializationException {
+	logger = getUimaContext().getLogger();
+    mInputdirectory = ((String) getConfigParameterValue(PARAM_INPUTDIR)).trim();
+    mOutputdirectory = ((String) getConfigParameterValue(PARAM_OUTPUTDIR)).trim();
+    mIgnorePrevious = (Boolean) getConfigParameterValue(PARAM_IGNOREPREVIOUS);
+    mEncoding  = (String) getConfigParameterValue(PARAM_ENCODING);
+    mLanguage  = (String) getConfigParameterValue(PARAM_LANGUAGE);
+    mSendToLast = (Boolean) getConfigParameterValue(PARAM_SENDTOLAST);
+    mDebug = (Boolean) getConfigParameterValue(PARAM_DEBUG);
+
+    if (null == mIgnorePrevious) {
+    	mIgnorePrevious = Boolean.FALSE;
+    }
+    if (null == mSendToLast) {
+    	mSendToLast = Boolean.FALSE;
+    }
+    if (null == mDebug) {
+    	mDebug = Boolean.FALSE;
+    }
+    mCurrentIndex = 0;
+    mPreviouslyDone = 0;
+
+    // if input directory does not exist or is not a directory, throw exception
+    File inDirectory = new File(mInputdirectory);
+    if (!inDirectory.exists() || !inDirectory.isDirectory()) {
+      throw new ResourceInitializationException(ResourceConfigurationException.DIRECTORY_NOT_FOUND,
+              new Object[] { PARAM_INPUTDIR, this.getMetaData().getName(), inDirectory.getPath() });
+    }
+
+    // if output directory does not exist or is not a directory, throw exception
+    File outDirectory = new File(mOutputdirectory);
+    if (outDirectory.exists() && !outDirectory.isDirectory()) {
+      throw new ResourceInitializationException(new RuntimeException("Specified output directory "+mOutputdirectory+" is a file"));
+    }
+    if (!outDirectory.exists()) {
+    	mIgnorePrevious = true;
+    }
+
+    mBlocksize = 0;
+    logger.log(Level.INFO, "Processing input files from "+mInputdirectory);
+    if (null != getConfigParameterValue(PARAM_BLOCKSIZE)) {
+      mBlocksize  = (Integer) getConfigParameterValue(PARAM_BLOCKSIZE);
+      logger.log(Level.INFO, "Using blocksize "+ mBlocksize);
+    }
+    if (null != mIgnorePrevious && mIgnorePrevious) {
+//      mIgnorePrevious = Boolean.FALSE;
+      logger.log(Level.INFO, "Overwriting previous outfiles");
+    }
+
+    // get list of files or file-parts in the specified directory, and subdirectories if recursive
+    mWorkList = new ArrayList<WorkItem>();
+    addFilesFromDir(inDirectory);
+    if (0 < mPreviouslyDone) {
+      logger.log(Level.INFO, "Preserving "+mPreviouslyDone+" output files in "+mOutputdirectory);
+    }
+    logger.log(Level.INFO, "Processing "+mWorkList.size()+" output files in "+mOutputdirectory);
+  }
+  
+  /**
+   * This method adds files or file-chunks in the input directory,
+   * if the respective output file does not exist,
+   * or if mIgnorePrevious = true.
+   * 
+   * @param dir
+   */
+  private void addFilesFromDir(File dir) {
+    File[] files = dir.listFiles();
+    for (int i = 0; i < files.length; i++) {
+      if (!files[i].isDirectory()) {
+        String outfilename = files[i].getAbsolutePath();
+        outfilename = outfilename.substring(mInputdirectory.length());
+        outfilename = mOutputdirectory+outfilename;
+        if (mBlocksize == 0) {
+          File outFile = new File(outfilename+".processed");
+          if (!mIgnorePrevious && outFile.exists()) {
+        	  mPreviouslyDone++;
+          }
+          if (mIgnorePrevious || !outFile.exists()) {
+            mWorkList.add(new WorkItem(files[i].getAbsolutePath(),outfilename+".processed",0,files[i].length(),0,false));
+            logger.log(Level.FINE, "adding "+outfilename);
+          }
+        }
+        // use blocksize
+        else {
+          long fsize = files[i].length();
+          long offset=0;
+          int j=0;
+          while (fsize > 0) {
+            String outfilechunk = outfilename+"_"+j;
+            long length = (fsize < mBlocksize) ? fsize : mBlocksize;
+            File outFile = new File(outfilechunk+".processed");
+            if (!mIgnorePrevious && outFile.exists()) {
+          	  mPreviouslyDone++;
+            }
+            if (mIgnorePrevious || !outFile.exists()) {
+              mWorkList.add(new WorkItem(files[i].getAbsolutePath(),outfilechunk+".processed",j,length,offset,fsize==length));
+              logger.log(Level.FINE, "adding "+outfilechunk);
+            }
+            j++;
+            fsize -= length;
+            offset += length;
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * @see org.apache.uima.collection.CollectionReader#hasNext()
+   */
+  public boolean hasNext() {
+    return mCurrentIndex < mWorkList.size();
+  }
+
+  /**
+   * @see org.apache.uima.collection.CollectionReader#getNext(org.apache.uima.cas.CAS)
+   */
+  public void getNext(CAS aCAS) throws IOException, CollectionException {
+    JCas jcas;
+    try {
+      jcas = aCAS.getJCas();
+      Workitem wi = new Workitem(jcas);
+      wi.setInputspec(mWorkList.get(mCurrentIndex).filename);
+      wi.setOutputspec(mWorkList.get(mCurrentIndex).outname);
+      wi.setBlockindex(mWorkList.get(mCurrentIndex).index);
+      wi.setBlocksize(mBlocksize);
+      wi.setBytelength(mWorkList.get(mCurrentIndex).length);
+      if (null != mEncoding) {
+    	  wi.setEncoding(mEncoding);
+      }
+      if (null != mLanguage) {
+    	  wi.setLanguage(mLanguage);
+      }
+      wi.setSendToLast(mSendToLast);
+      wi.addToIndexes();
+      wi.setLastBlock(mWorkList.get(mCurrentIndex).last);
+      logger.log(Level.INFO, "Sending "+wi.getInputspec()+" index="+wi.getBlockindex()+" last="+wi.getLastBlock()+" length="+wi.getBytelength());
+      mCurrentIndex++;
+      jcas.setDocumentText(wi.getInputspec()+" index="+wi.getBlockindex()+" length="+wi.getBytelength());
+    } catch (CASException e) {
+      throw new CollectionException(e);
+    }
+
+    //create WorkItem info structure
+  }
+
+  /**
+   * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#close()
+   */
+  public void close() throws IOException {
+  }
+
+  /**
+   * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#getProgress()
+   */
+  public Progress[] getProgress() {
+    return new Progress[] { new ProgressImpl(mCurrentIndex, mWorkList.size(), Progress.ENTITIES) };
+  }
+
+  /**
+   * Gets the total number of documents that will be returned by this collection reader. This is not
+   * part of the general collection reader interface.
+   * 
+   * @return the number of documents in the collection
+   */
+  public int getNumberOfDocuments() {
+    return mWorkList.size();
+  }
+
+}

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCC.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCC.java?rev=1510744&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCC.java (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCC.java Mon Aug  5 21:34:40 2013
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ducc.sampleapps;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.apache.uima.ducc.Workitem;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.XMLSerializer;
+import org.xml.sax.SAXException;
+
+public class DuccTextCC extends JCasAnnotator_ImplBase {
+  private Logger logger;
+  private String outputFilename=null;
+  private File outFile;
+  private FileOutputStream fos;
+  private ZipOutputStream zos;
+
+
+  public void initialize(UimaContext aContext) throws ResourceInitializationException {
+    super.initialize(aContext);
+    logger = aContext.getLogger();
+  }
+
+  public void process(JCas jcas) throws AnalysisEngineProcessException {
+    Iterator<FeatureStructure> fsit = jcas.getIndexRepository().getAllIndexedFS(jcas.getCasType(Workitem.type));
+    if (fsit.hasNext()) {
+      Workitem wi = (Workitem) fsit.next();
+      if (!outputFilename.equals(wi.getOutputspec())) {
+    	  throw new AnalysisEngineProcessException(new RuntimeException("flush mismatch: "+outputFilename+" != "+wi.getOutputspec()));
+      }
+      try {
+		zos.close();
+		fos.close();
+		if (!outFile.renameTo(new File(outputFilename))) {
+			throw new IOException("Rename failed for "+outputFilename);
+		}
+	} catch (IOException e) {
+		throw new AnalysisEngineProcessException(e);
+	}
+      logger.log(Level.INFO, "DuccDummyCC: Flushed "+wi.getOutputspec());
+      return;
+    }
+
+    fsit = jcas.getIndexRepository().getAllIndexedFS(jcas.getCasType(DuccDocumentInfo.type));
+    if (!fsit.hasNext()) {
+      throw new AnalysisEngineProcessException(new RuntimeException("No DuccDocumentInfo FS in CAS"));
+    }
+    DuccDocumentInfo di = (DuccDocumentInfo) fsit.next();
+//    logger.log(Level.FINE, "DuccDummyCC: No workitem FS found in CAS");
+    String outputfile = di.getOutputfile();
+    if (!outputfile.equals(outputFilename)) {
+    	// create new output file
+    	outputFilename = outputfile;
+    	try {
+        	outFile = new File(outputFilename+"_temp");
+        	File outDir = outFile.getParentFile();
+        	if (outDir != null && !outDir.exists()) {
+        		outDir.mkdirs();
+        	}
+			fos = new FileOutputStream(outFile);
+			zos = new ZipOutputStream(fos);
+			zos.setLevel(7); //TODO turn off compression for binary
+		} catch (FileNotFoundException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+    }
+    ZipEntry ze = new ZipEntry("doc_"+di.getDocseq());
+    ze.setMethod(ZipEntry.DEFLATED);
+    try {
+		zos.putNextEntry(ze);
+	    // write XMI
+		XmiCasSerializer ser = new XmiCasSerializer(jcas.getTypeSystem());
+	    XMLSerializer xmlSer = new XMLSerializer(zos, false);
+		ser.serialize(jcas.getCas(), xmlSer.getContentHandler());
+	    zos.closeEntry();
+	} catch (IOException e) {
+		// TODO Auto-generated catch block
+		e.printStackTrace();
+	} catch (SAXException e) {
+		// TODO Auto-generated catch block
+		e.printStackTrace();
+	}
+
+  }
+
+}

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java?rev=1510744&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java Mon Aug  5 21:34:40 2013
@@ -0,0 +1,300 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ducc.sampleapps;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.channels.FileChannel;
+import java.util.Arrays;
+import java.util.Iterator;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasMultiplier_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.ducc.Workitem;
+import org.apache.uima.ducc.sampleapps.DuccDocumentInfo;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+
+public class DuccTextCM extends JCasMultiplier_ImplBase {
+  private byte[] buffer = null;
+  private int buffsize;
+  private FileInputStream fis;
+  private String inputFileName;
+  private String outputFileName;
+  private String language;
+  private String encoding;
+  private String nextDoc;
+  private int nextDocOffset;
+  private int bytelength;
+  private int blockindex;
+  private boolean newWI;
+  private boolean spilled;
+  private boolean firstdoc;
+  private boolean lastblock;
+  private int docInWI;
+  private long filesize;
+  private Workitem wi;
+  private int currentindex;
+  private Logger logger;
+  FileChannel fc;
+
+  private enum NextDoc { FIRSTDOC, SEP_IN_LASTBLOCK, NORMAL };
+  private NextDoc strategy;
+  
+  private final int DEFAULT_BUFFER_SIZE = 20000000;
+
+  public boolean hasNext() throws AnalysisEngineProcessException {
+	if (spilled) {
+	  return false;
+	}
+	try {
+      return findnextdoc(strategy);
+	} catch (IOException e) {
+	  throw new AnalysisEngineProcessException(e);
+	}
+  }
+
+  public AbstractCas next() throws AnalysisEngineProcessException {
+    JCas newcas = getEmptyJCas();
+    newcas.setDocumentText(getNextDocument());
+    newcas.setDocumentLanguage(language);
+    DuccDocumentInfo di = new DuccDocumentInfo(newcas);
+    di.setInputfile(inputFileName);
+    di.setOutputfile(outputFileName);
+    di.setDocseq(docInWI++);
+    di.setByteoffset(wi.getBlockindex() * wi.getBlocksize() + nextDocOffset);
+    di.addToIndexes();
+    return newcas;
+  }
+
+  @Override
+  public void process(JCas jcas) throws AnalysisEngineProcessException {
+    Iterator<FeatureStructure> fsit = jcas.getIndexRepository().getAllIndexedFS(jcas.getCasType(Workitem.type));
+    if (!fsit.hasNext()) {
+      throw new AnalysisEngineProcessException(new RuntimeException("No workitem FS in CAS"));
+    }
+    wi = (Workitem) fsit.next();
+    logger.log(Level.INFO, "DuccTextCM: "+wi.getInputspec()+" at block "+wi.getBlockindex()+" length "+wi.getBytelength()+
+    		" offset "+wi.getBlockindex() * wi.getBlocksize()+" outputs "+wi.getOutputspec());
+    try {
+      openInputFile(wi);
+    } catch (IOException e) {
+      throw new AnalysisEngineProcessException(e);
+    }
+
+    if (buffer == null) {
+      if (wi.getBlocksize()>0) {
+    	buffer = new byte[wi.getBlocksize() * 2];
+    	buffsize = wi.getBlocksize() * 2;
+      }
+      else {
+    	buffer = new byte[DEFAULT_BUFFER_SIZE];
+    	buffsize = DEFAULT_BUFFER_SIZE;
+      }
+    }
+    else {
+      if (wi.getBytelength() > buffsize) {
+    	buffer = new byte[wi.getBytelength() * 2];
+        buffsize = wi.getBytelength();
+      }
+    }
+
+    spilled = false;
+    docInWI = 0;
+    strategy = (blockindex == 0) ? NextDoc.FIRSTDOC : NextDoc.NORMAL;
+  }
+
+
+  public void initialize(UimaContext aContext) throws ResourceInitializationException {
+    super.initialize(aContext);
+    logger = aContext.getLogger();
+  }
+
+
+  private void openInputFile(Workitem wi) throws IOException {
+    inputFileName = wi.getInputspec();
+    outputFileName = wi.getOutputspec();
+    bytelength = wi.getBytelength();
+    blockindex = wi.getBlockindex();
+    lastblock = wi.getLastBlock();
+    language = wi.getLanguage();
+    fis = new FileInputStream(new File(inputFileName));
+    encoding = (null==wi.getEncoding()) ? "UTF-8" : wi.getEncoding();
+    fc = fis.getChannel();
+    long start = wi.getBlockindex() * wi.getBlocksize();
+    filesize = fc.size();
+    if (start > filesize) {
+      throw new IOException("Specifid start position beyond end of input file "+inputFileName);
+    }
+    fis.skip(start);
+	newWI = true;
+  }
+
+  private boolean findnextdoc(NextDoc condition) throws IOException {
+    int startloc=-1;
+
+    if (newWI) {
+      newWI = false;
+      int len = fis.read(buffer,0,bytelength);
+      if (len != bytelength) {
+    	throw new IOException("Read "+len+" bytes, expected "+bytelength);
+      }
+   	  currentindex = 0;
+    }
+
+    if (condition.equals(NextDoc.SEP_IN_LASTBLOCK)) {
+    	// separator found at end of last block
+    	if (10 == buffer[currentindex] && 10 == buffer[currentindex+1]) {
+      	  return false;
+      	}
+      	if (10 == buffer[currentindex]) {
+      	  currentindex++; // point at first char in Doc
+      	}
+      	startloc=currentindex;
+
+        // find end of next doc
+        int endloc=0;
+        while (currentindex < (bytelength-1)) {
+          if (10 == buffer[currentindex] && 10 == buffer[currentindex+1]) {
+        	endloc = currentindex - 1;
+        	break;
+          }
+          else {
+        	currentindex++;
+          }
+        }
+        if (endloc == 0) {
+          throw new RuntimeException("Document larger than "+bytelength+" found in "+inputFileName+" block "+blockindex);
+        }
+        byte [] docbytes = Arrays.copyOfRange(buffer, startloc, endloc);
+        nextDoc = new String(docbytes, encoding);
+        nextDocOffset = startloc;
+        return true;
+      }
+
+    if (condition.equals(NextDoc.FIRSTDOC)) {
+      // special handling at beginning of first block
+      // skip any leading EOL to find start of first doc
+      // only execute this once
+      strategy = NextDoc.NORMAL;
+      while (10 == buffer[currentindex]) {
+    	currentindex++;
+    	if (currentindex == bytelength) {
+    	  if (firstdoc) {
+    	    throw new RuntimeException("All newlines found in "+inputFileName+" block "+blockindex);
+    	  }
+    	}
+      }
+    }
+
+    if (condition.equals(NextDoc.NORMAL)) {
+    	// currentindex either pointing at start of a segmentation, or 
+    	// if a new block then possibly the middle of a previous document
+      if (!(10 == buffer[currentindex] && 10 == buffer[currentindex+1])) {
+      	// in the middle of a spilled Doc. Find next segmentation
+      	while (currentindex < (bytelength-1)) {
+      	  if (10 == buffer[currentindex] && 10 == buffer[currentindex+1]) {
+      		break;
+      	  }
+      	  else {
+      		currentindex++;
+      	  }
+      	}
+      }
+      if ( currentindex == bytelength-1) {
+    	fis.close();
+    	return false;
+      }
+      // now pointing at start of a segmentation, find start/end of next Doc
+      while (10 == buffer[currentindex]) {
+    	currentindex++;
+    	if (currentindex == bytelength) {
+    	  if (lastblock) {
+    		fis.close();
+    		return false;
+    	  }
+          // read next block and continue looking for end of Doc
+    	  int len = fis.read(buffer,bytelength,bytelength);
+    	  if (len <= 0) {
+            throw new IOException("Read "+len+" bytes for "+inputFileName+" block "+blockindex+1);
+    	  }
+    	  fis.close();
+    	  spilled = true;
+    	  bytelength += len;
+    	  return findnextdoc(NextDoc.SEP_IN_LASTBLOCK);
+    	}
+      }
+    }
+
+    startloc = currentindex;
+    // find end of Doc
+    int endloc=0;
+    while (currentindex < (bytelength-1)) {
+      if (10 == buffer[currentindex] && 10 == buffer[currentindex+1]) {
+    	endloc = currentindex - 1;
+      	break;
+      }
+      else {
+    	currentindex++;
+      }
+    }
+
+      if (endloc == 0) {
+    	if (lastblock) {
+    	  endloc = bytelength-1;
+    	}
+    	else {
+    	  // read next block and continue looking for end of Doc
+          int len = fis.read(buffer,bytelength,bytelength);
+          if (len <= 0) {
+        	throw new IOException("Read "+len+" bytes for "+inputFileName+" block "+blockindex+1);
+          }
+          fis.close();
+          spilled = true;
+          bytelength += len;
+    	}
+        while (currentindex < (bytelength-1)) {
+          if (10 == buffer[currentindex] && 10 == buffer[currentindex+1]) {
+        	endloc = currentindex - 1;
+          	break;
+          }
+          else {
+          	currentindex++;
+          }
+        }
+        endloc = currentindex - 1;
+      }
+      byte [] docbytes = Arrays.copyOfRange(buffer, startloc, endloc);
+      nextDoc = new String(docbytes, encoding);
+      nextDocOffset = startloc;
+      return true;
+  }
+
+  private String getNextDocument() {
+    return nextDoc;
+  }
+
+}

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml?rev=1510744&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccDocumentInfoTS.xml Mon Aug  5 21:34:40 2013
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?><typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <name>Ducc Document Info</name>
+  <description>Type for communication between CR, CM, FC and CC</description>
+  <version>1.0</version>
+  <vendor>Apache UIMA</vendor>
+  <types>
+    <typeDescription>
+      <name>org.apache.uima.ducc.sampleapps.DuccDocumentInfo</name>
+      <description/>
+      <supertypeName>uima.cas.TOP</supertypeName>
+      <features>
+        <featureDescription>
+          <name>inputfile</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+        <featureDescription>
+          <name>outputfile</name>
+          <description/>
+          <rangeTypeName>uima.cas.String</rangeTypeName>
+        </featureDescription>
+        <featureDescription>
+          <name>docseq</name>
+          <description>document sequence within work item</description>
+          <rangeTypeName>uima.cas.Integer</rangeTypeName>
+        </featureDescription>
+        <featureDescription>
+          <name>byteoffset</name>
+          <description>offset of byte location of first character in document</description>
+          <rangeTypeName>uima.cas.Integer</rangeTypeName>
+        </featureDescription>
+      </features>
+    </typeDescription>
+  </types>
+</typeSystemDescription>
\ No newline at end of file

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobTextCR.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobTextCR.xml?rev=1510744&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobTextCR.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobTextCR.xml Mon Aug  5 21:34:40 2013
@@ -0,0 +1,117 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+	<!--
+	 ***************************************************************
+	 * Licensed to the Apache Software Foundation (ASF) under one
+	 * or more contributor license agreements.  See the NOTICE file
+	 * distributed with this work for additional information
+	 * regarding copyright ownership.  The ASF licenses this file
+	 * to you under the Apache License, Version 2.0 (the
+	 * "License"); you may not use this file except in compliance
+	 * with the License.  You may obtain a copy of the License at
+         *
+	 *   http://www.apache.org/licenses/LICENSE-2.0
+	 * 
+	 * Unless required by applicable law or agreed to in writing,
+	 * software distributed under the License is distributed on an
+	 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+	 * KIND, either express or implied.  See the License for the
+	 * specific language governing permissions and limitations
+	 * under the License.
+	 ***************************************************************
+   -->
+   
+<collectionReaderDescription  xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <implementationName>org.apache.uima.ducc.sampleapps.DuccJobTextCR</implementationName>
+    <processingResourceMetaData>
+        <name>DuccJobTextCR</name>
+        <description>Generates CASes with reference to input.</description>
+        <version>1.0</version>
+        <vendor>Apache UIMA</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>InputDirectory</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>OutputDirectory</name>
+        <description>The base output directory</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>IgnorePreviousOutput</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Encoding</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Language</name>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>BlockSize</name>
+        <type>Integer</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>SendToLast</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>Debug</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>InputDirectory</name>
+        <value>
+          <string>/tmp</string>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>OutputDirectory</name>
+        <value>
+          <string>/tmp</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.ducc.common.uima.DuccJobFlowControlTS"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+</collectionReaderDescription>

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccTextCC.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccTextCC.xml?rev=1510744&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccTextCC.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccTextCC.xml Mon Aug  5 21:34:40 2013
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+	<!--
+	 ***************************************************************
+	 * Licensed to the Apache Software Foundation (ASF) under one
+	 * or more contributor license agreements.  See the NOTICE file
+	 * distributed with this work for additional information
+	 * regarding copyright ownership.  The ASF licenses this file
+	 * to you under the Apache License, Version 2.0 (the
+	 * "License"); you may not use this file except in compliance
+	 * with the License.  You may obtain a copy of the License at
+         *
+	 *   http://www.apache.org/licenses/LICENSE-2.0
+	 * 
+	 * Unless required by applicable law or agreed to in writing,
+	 * software distributed under the License is distributed on an
+	 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+	 * KIND, either express or implied.  See the License for the
+	 * specific language governing permissions and limitations
+	 * under the License.
+	 ***************************************************************
+   -->
+   
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.uima.ducc.sampleapps.DuccTextCC</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>DuccTextCC</name>
+    <description/>
+    <version>1.0</version>
+    <vendor/>
+    <configurationParameters/>
+    <configurationParameterSettings/>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.ducc.common.uima.DuccJobFlowControlTS"/>
+        <import name="org.apache.uima.ducc.sampleapps.DuccDocumentInfoTS"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>
+   
\ No newline at end of file

Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccTextCM.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccTextCM.xml?rev=1510744&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccTextCM.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccTextCM.xml Mon Aug  5 21:34:40 2013
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+	<!--
+	 ***************************************************************
+	 * Licensed to the Apache Software Foundation (ASF) under one
+	 * or more contributor license agreements.  See the NOTICE file
+	 * distributed with this work for additional information
+	 * regarding copyright ownership.  The ASF licenses this file
+	 * to you under the Apache License, Version 2.0 (the
+	 * "License"); you may not use this file except in compliance
+	 * with the License.  You may obtain a copy of the License at
+         *
+	 *   http://www.apache.org/licenses/LICENSE-2.0
+	 * 
+	 * Unless required by applicable law or agreed to in writing,
+	 * software distributed under the License is distributed on an
+	 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+	 * KIND, either express or implied.  See the License for the
+	 * specific language governing permissions and limitations
+	 * under the License.
+	 ***************************************************************
+   -->
+   
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>org.apache.uima.ducc.sampleapps.DuccTextCM</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>DuccTextCM</name>
+    <description>Reads ...</description>
+    <version>1.0</version>
+    <vendor/>
+    <configurationParameters/>
+    <configurationParameterSettings/>
+    <typeSystemDescription>
+      <imports>
+        <import name="org.apache.uima.ducc.common.uima.DuccJobFlowControlTS"/>
+        <import name="org.apache.uima.ducc.sampleapps.DuccDocumentInfoTS"/>
+      </imports>
+    </typeSystemDescription>
+    <typePriorities/>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+  <resourceManagerConfiguration/>
+</analysisEngineDescription>