You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ea...@apache.org on 2013/09/30 15:33:33 UTC
svn commit: r1527553 - in
/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main:
java/org/apache/uima/ducc/sampleapps/
resources/org/apache/uima/ducc/sampleapps/
Author: eae
Date: Mon Sep 30 13:33:32 2013
New Revision: 1527553
URL: http://svn.apache.org/r1527553
Log:
UIMA-3211
Added:
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCC.xml
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCM.xml
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobCasCR.xml
Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java Mon Sep 30 13:33:32 2013
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ducc.sampleapps;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.Serialization;
+import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.apache.uima.ducc.Workitem;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.TypeSystemUtil;
+import org.apache.uima.util.XMLSerializer;
+
+public class DuccCasCC extends JCasAnnotator_ImplBase {
+
+ public static final String PARAM_XMICOMPRESSIONLEVEL = "XmiCompressionLevel";
+ public static final String PARAM_USEBINARYSERIALIZATION = "UseBinarySerialization";
+
+ private Logger logger;
+ private String outputFilename=null;
+ private File outFile;
+ private FileOutputStream fos;
+ private ZipOutputStream zos;
+ private boolean useBinaryCas;
+ private int zipCompLevel;
+ private String casExt;
+
+
+ public void initialize(UimaContext aContext) throws ResourceInitializationException {
+ super.initialize(aContext);
+ zipCompLevel = (Integer)getContext().getConfigParameterValue(PARAM_XMICOMPRESSIONLEVEL);
+ useBinaryCas = (null == getContext().getConfigParameterValue(PARAM_USEBINARYSERIALIZATION)) ? Boolean.FALSE :
+ (Boolean) getContext().getConfigParameterValue(PARAM_USEBINARYSERIALIZATION);
+ logger = aContext.getLogger();
+ if (useBinaryCas) {
+ zipCompLevel = 0;
+ casExt = "cas";
+ logger.log(Level.INFO, "Outputting CASes in UIMA compressed binary form 6");
+ }
+ else {
+ casExt = "xmi";
+ logger.log(Level.INFO, "Outputting CASes in XmiCas format, zip compressed at level="+zipCompLevel);
+ }
+ }
+
+ public void process(JCas jcas) throws AnalysisEngineProcessException {
+ Iterator<FeatureStructure> fsit = jcas.getIndexRepository().getAllIndexedFS(jcas.getCasType(Workitem.type));
+ if (fsit.hasNext()) {
+ Workitem wi = (Workitem) fsit.next();
+ if (!outputFilename.equals(wi.getOutputspec())) {
+ throw new AnalysisEngineProcessException(new RuntimeException("flush mismatch: "+outputFilename+" != "+wi.getOutputspec()));
+ }
+ try {
+ zos.close();
+ fos.close();
+ if (!outFile.renameTo(new File(outputFilename))) {
+ throw new IOException("Rename failed for "+outputFilename);
+ }
+ } catch (IOException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ logger.log(Level.INFO, "DuccDummyCC: Flushed "+wi.getOutputspec());
+ return;
+ }
+
+ fsit = jcas.getIndexRepository().getAllIndexedFS(jcas.getCasType(DuccDocumentInfo.type));
+ if (!fsit.hasNext()) {
+ throw new AnalysisEngineProcessException(new RuntimeException("No DuccDocumentInfo FS in CAS"));
+ }
+ DuccDocumentInfo di = (DuccDocumentInfo) fsit.next();
+ String outputfile = di.getOutputfile();
+ if (!outputfile.equals(outputFilename)) {
+ // create new output file
+ outputFilename = outputfile;
+ try {
+ outFile = new File(outputFilename+"_temp");
+ File outDir = outFile.getParentFile();
+ if (outDir != null && !outDir.exists()) {
+ outDir.mkdirs();
+ }
+ fos = new FileOutputStream(outFile);
+ zos = new ZipOutputStream(new BufferedOutputStream(fos,1024*100));
+ if (useBinaryCas) {
+ //put the output CAS typesystem in the output zipfile
+ ZipEntry ze = new ZipEntry("typesystem.xml");
+ ze.setMethod(ZipEntry.DEFLATED);
+ zos.setLevel(9);
+ zos.putNextEntry(ze);
+ TypeSystem ts = jcas.getTypeSystem();
+ TypeSystemDescription tsDesc = TypeSystemUtil.typeSystem2TypeSystemDescription(ts);
+ tsDesc.toXML(zos); // Capture type system in XML format
+ zos.closeEntry();
+ }
+ zos.setLevel(zipCompLevel);
+ } catch (Exception e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ }
+
+ ZipEntry ze = new ZipEntry("doc_"+di.getDocseq()+"."+casExt);
+ ze.setMethod(ZipEntry.DEFLATED);
+ try {
+ zos.putNextEntry(ze);
+ BufferedOutputStream bos = new BufferedOutputStream(zos,1024*10);
+ if (useBinaryCas) {
+ Serialization.serializeWithCompression(jcas.getCas(), bos, jcas.getTypeSystem());
+ }
+ else {
+ // write XMI
+ XmiCasSerializer ser = new XmiCasSerializer(jcas.getTypeSystem());
+ XMLSerializer xmlSer = new XMLSerializer(bos, false);
+ ser.serialize(jcas.getCas(), xmlSer.getContentHandler());
+ }
+ bos.flush();
+ zos.closeEntry();
+ } catch (Exception e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+
+ }
+
+}
Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java Mon Sep 30 13:33:32 2013
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ducc.sampleapps;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasMultiplier_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.Serialization;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
+import org.apache.uima.ducc.Workitem;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.util.CasCreationUtils;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.XMLInputSource;
+import org.apache.uima.util.XMLParser;
+
+public class DuccCasCM extends JCasMultiplier_ImplBase {
+ private String inputFileName;
+ private String outputFileName;
+ private FileInputStream fis;
+ private ZipInputStream zis;
+ private ZipEntry nextEntry;
+ private Workitem wi;
+ private int docInWI;
+ private boolean readingXmiFormat;
+ private TypeSystem inputTS;
+ private Logger logger;
+
+ public boolean hasNext() throws AnalysisEngineProcessException {
+ try {
+ nextEntry = zis.getNextEntry();
+ } catch (IOException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ return (nextEntry != null) ? true : false;
+ }
+
+ public AbstractCas next() throws AnalysisEngineProcessException {
+ JCas newcas = getEmptyJCas();
+ if (0 == docInWI) {
+ if (nextEntry.getName().equals("typesystem.xml")) {
+ getTypesystem();
+ readingXmiFormat = false;
+ }
+ else {
+ readingXmiFormat = true;
+ }
+ }
+ else {
+ if (nextEntry.getName().equals("typesystem.xml")) {
+ throw new AnalysisEngineProcessException(new RuntimeException(
+ "typesystem.xml entry found in the middle of input zipfile "+inputFileName));
+ }
+ }
+ byte[] buff = new byte[10000];
+ int bytesread;
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try {
+ while (-1 != (bytesread = zis.read(buff))) {
+ baos.write(buff,0,bytesread);
+ }
+ ByteArrayInputStream bis = new ByteArrayInputStream(baos.toByteArray());
+ if (readingXmiFormat) {
+ XmiCasDeserializer.deserialize(bis, newcas.getCas());
+ }
+ else {
+ Serialization.deserializeCAS(newcas.getCas(), bis, inputTS, null);
+ }
+ } catch (Exception e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ Iterator<FeatureStructure> fsit = newcas.getIndexRepository().getAllIndexedFS(newcas.getCasType(DuccDocumentInfo.type));
+ DuccDocumentInfo di;
+ if (fsit.hasNext()) {
+ di = (DuccDocumentInfo) fsit.next();
+ }
+ else {
+ di = new DuccDocumentInfo(newcas);
+ di.addToIndexes();
+ }
+ di.setInputfile(inputFileName);
+ di.setOutputfile(outputFileName);
+ di.setDocseq(docInWI++);
+ return newcas;
+ }
+
+ @Override
+ public void process(JCas jcas) throws AnalysisEngineProcessException {
+ Iterator<FeatureStructure> fsit = jcas.getIndexRepository().getAllIndexedFS(jcas.getCasType(Workitem.type));
+ if (!fsit.hasNext()) {
+ throw new AnalysisEngineProcessException(new RuntimeException("No workitem FS in CAS"));
+ }
+ wi = (Workitem) fsit.next();
+ logger.log(Level.INFO, "DuccCasCM: inputs "+wi.getInputspec()+" outputs "+wi.getOutputspec());
+ try {
+ openInputFile(wi);
+ } catch (IOException e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ }
+
+
+ public void initialize(UimaContext aContext) throws ResourceInitializationException {
+ super.initialize(aContext);
+ logger = aContext.getLogger();
+ }
+
+
+ private void openInputFile(Workitem wi) throws IOException {
+ inputFileName = wi.getInputspec();
+ outputFileName = wi.getOutputspec();
+ fis = new FileInputStream(new File(inputFileName));
+ zis = new ZipInputStream(new BufferedInputStream(fis,1024*100));
+ docInWI = 0;
+ }
+
+
+ private void getTypesystem() throws AnalysisEngineProcessException {
+ byte[] buff = new byte[10000];
+ int bytesread;
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ try {
+ while (-1 != (bytesread = zis.read(buff))) {
+ baos.write(buff,0,bytesread);
+ }
+ ByteArrayInputStream bis = new ByteArrayInputStream(baos.toByteArray());
+ // Get XML parser from framework
+ XMLParser xmlParser = UIMAFramework.getXMLParser();
+ // Parse type system descriptor
+ TypeSystemDescription tsDesc = xmlParser.parseTypeSystemDescription(new XMLInputSource((InputStream)bis,null));
+ // Use type system description to create CAS and get the type system object
+ inputTS = CasCreationUtils.createCas(tsDesc, null, null).getTypeSystem();
+ // advance to first input CAS
+ nextEntry = zis.getNextEntry();
+ } catch (Exception e) {
+ throw new AnalysisEngineProcessException(e);
+ }
+ }
+}
Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java Mon Sep 30 13:33:32 2013
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ducc.sampleapps;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASException;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.collection.CollectionReader_ImplBase;
+import org.apache.uima.ducc.Workitem;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Level;
+import org.apache.uima.util.Logger;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+
+/**
+ * A DUCC Job collection reader that reads text files from a directory in the filesystem.
+ * It can be configured with the following parameters:
+ * <ul>
+ * <li><code>InputDirectory</code> - path to directory containing input *.zip files</li>
+ * <li><code>OutputDirectory</code> - path to directory for output files</li>
+ * <li><code>Language</code> (optional) - language of the input documents</li>
+ * <li><code>BlockSize</code> (optional) - Block size used to process input files</li>
+ * <li><code>SendToLast</code> (optional) - flag to route WorkItem CAS to last pipeline component. Only used for jobs with initial CM.</li>
+ * <li><code>SendToAll</code> (optional) - flag to route WorkItem CAS to all pipeline components. Only used for jobs with initial CM.</li>
+ * </ul>
+ *
+ */
+public class DuccJobCasCR extends CollectionReader_ImplBase {
+ /**
+ * Name of configuration parameter that must be set to the path of a directory containing input
+ * files.
+ */
+ public static final String PARAM_INPUTSPEC = "InputSpec";
+
+ /**
+ * Name of configuration parameter that must be set to the path of the base directory
+ * where output files will be created.
+ */
+ public static final String PARAM_OUTPUTDIR = "OutputDirectory";
+
+ /**
+ * Name of configuration parameter that indicates if previous output should be ignored.
+ */
+ public static final String PARAM_IGNOREPREVIOUS = "IgnorePreviousOutput";
+
+ /**
+ * Flag to route WorkItem CAS to last pipeline component. Used to flush any output data.
+ * This string is ignored if the Job has a CM component.
+ */
+ public static final String PARAM_SENDTOLAST = "SendToLast";
+
+ /**
+ * Flag to route WorkItem CAS to all pipeline components.
+ * If the Job has no CM component the WI CAS is already sent to AE and CC.
+ */
+ public static final String PARAM_SENDTOALL = "SendToAll";
+
+ class WorkItem {
+ public WorkItem(String absolutePathIn, String absolutePathOut) {
+ filename=absolutePathIn;
+ outname=absolutePathOut;
+ }
+ String filename;
+ String outname;
+ }
+
+ private ArrayList<WorkItem> mWorkList;
+
+ private String mInputdirectory;
+
+ private String mOutputdirectory;
+
+ private Boolean mIgnorePrevious;
+
+ private int mCurrentIndex;
+
+ private Boolean mSendToLast;
+
+ private Boolean mSendToAll;
+
+ private int mPreviouslyDone;
+
+ private Logger logger;
+
+ /**
+ * @see org.apache.uima.collection.CollectionReader_ImplBase#initialize()
+ */
+ public void initialize() throws ResourceInitializationException {
+ logger = getUimaContext().getLogger();
+ String inputSpec = ((String) getConfigParameterValue(PARAM_INPUTSPEC)).trim();
+ mOutputdirectory = ((String) getConfigParameterValue(PARAM_OUTPUTDIR)).trim();
+ mIgnorePrevious = (Boolean) getConfigParameterValue(PARAM_IGNOREPREVIOUS);
+ mSendToLast = (Boolean) getConfigParameterValue(PARAM_SENDTOLAST);
+ mSendToAll = (Boolean) getConfigParameterValue(PARAM_SENDTOALL);
+
+ if (null == mIgnorePrevious) {
+ mIgnorePrevious = Boolean.FALSE;
+ }
+ if (null == mSendToLast) {
+ mSendToLast = Boolean.FALSE;
+ }
+ if (null == mSendToAll) {
+ mSendToAll = Boolean.FALSE;
+ }
+ mCurrentIndex = 0;
+ mPreviouslyDone = 0;
+
+ mInputdirectory = inputSpec;
+ // if input directory does not exist or is not a directory, throw exception
+ File inDirectory = new File(mInputdirectory);
+ if (!inDirectory.exists() || !inDirectory.isDirectory()) {
+ throw new ResourceInitializationException(new RuntimeException( mInputdirectory+" is not a directory"));
+ }
+
+ // if output directory is a file throw exception
+ File outDirectory = new File(mOutputdirectory);
+ if (outDirectory.exists() && !outDirectory.isDirectory()) {
+ throw new ResourceInitializationException(new RuntimeException("Specified output directory "+mOutputdirectory+" is a file"));
+ }
+
+ logger.log(Level.INFO, "Processing \".zip files in"+mInputdirectory);
+ if (null != mIgnorePrevious && mIgnorePrevious) {
+ logger.log(Level.INFO, "Overwriting previous outfiles");
+ }
+
+ // get list of files in the specified directory
+ mWorkList = new ArrayList<WorkItem>();
+ addFilesFromDir(inDirectory, "zip");
+ if (0 < mPreviouslyDone) {
+ logger.log(Level.INFO, "Preserving "+mPreviouslyDone+" output files in "+mOutputdirectory);
+ }
+ logger.log(Level.INFO, "Processing "+mWorkList.size()+" output files in "+mOutputdirectory);
+ }
+
+ /**
+ * This method adds files in the input directory,
+ * if the respective output file does not exist,
+ * or if mIgnorePrevious = true.
+ *
+ * @param dir
+ */
+ private void addFilesFromDir(File dir, String ext) {
+ File[] files = dir.listFiles();
+ for (int i = 0; i < files.length; i++) {
+ if (!files[i].isDirectory()) {
+ String outfilename = files[i].getAbsolutePath();
+ if (!outfilename.endsWith("."+ext)) {
+ continue;
+ }
+ outfilename = outfilename.substring(mInputdirectory.length());
+ outfilename = mOutputdirectory+outfilename;
+ File outFile = new File(outfilename);
+ if (!mIgnorePrevious && outFile.exists()) {
+ mPreviouslyDone++;
+ }
+ if (mIgnorePrevious || !outFile.exists()) {
+ mWorkList.add(new WorkItem(files[i].getAbsolutePath(),outfilename));
+ logger.log(Level.FINE, "adding "+outfilename);
+ }
+ }
+ }
+ }
+
+ /**
+ * @see org.apache.uima.collection.CollectionReader#hasNext()
+ */
+ public boolean hasNext() {
+ return mCurrentIndex < mWorkList.size();
+ }
+
+ /**
+ * @see org.apache.uima.collection.CollectionReader#getNext(org.apache.uima.cas.CAS)
+ */
+ public void getNext(CAS aCAS) throws IOException, CollectionException {
+ JCas jcas;
+ try {
+ jcas = aCAS.getJCas();
+ Workitem wi = new Workitem(jcas);
+ wi.setInputspec(mWorkList.get(mCurrentIndex).filename);
+ wi.setOutputspec(mWorkList.get(mCurrentIndex).outname);
+ wi.setSendToLast(mSendToLast);
+ wi.setSendToAll(mSendToAll);
+ wi.addToIndexes();
+ logger.log(Level.INFO, "Sending "+wi.getInputspec());
+ mCurrentIndex++;
+ jcas.setDocumentText(wi.getInputspec());
+ } catch (CASException e) {
+ throw new CollectionException(e);
+ }
+
+ //create WorkItem info structure
+ }
+
+ /**
+ * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#close()
+ */
+ public void close() throws IOException {
+ }
+
+ /**
+ * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#getProgress()
+ */
+ public Progress[] getProgress() {
+ return new Progress[] { new ProgressImpl(mCurrentIndex, mWorkList.size(), Progress.ENTITIES) };
+ }
+
+ /**
+ * Gets the total number of documents that will be returned by this collection reader. This is not
+ * part of the general collection reader interface.
+ *
+ * @return the number of documents in the collection
+ */
+ public int getNumberOfDocuments() {
+ return mWorkList.size();
+ }
+
+}
Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCC.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCC.xml?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCC.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCC.xml Mon Sep 30 13:33:32 2013
@@ -0,0 +1,77 @@
+<?xml version="1.0" encoding="UTF-8"?><analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+
+ <!--
+ ***************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ***************************************************************
+ -->
+
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>org.apache.uima.ducc.sampleapps.DuccCasCC</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>DuccCasCC</name>
+ <description/>
+ <version>1.0</version>
+ <vendor/>
+ <configurationParameters>
+ <configurationParameter>
+ <name>XmiCompressionLevel</name>
+ <type>Integer</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>UseBinarySerialization</name>
+ <description>Enable use of UIMA compressed binary CAS format</description>
+ <type>Boolean</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>XmiCompressionLevel</name>
+ <value>
+ <integer>7</integer>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ <typeSystemDescription>
+ <imports>
+ <import name="org.apache.uima.ducc.common.uima.DuccJobFlowControlTS"/>
+ <import name="org.apache.uima.ducc.sampleapps.DuccDocumentInfoTS"/>
+ </imports>
+ </typeSystemDescription>
+ <typePriorities/>
+ <fsIndexCollection/>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs/>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>false</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+ <resourceManagerConfiguration/>
+</analysisEngineDescription>
\ No newline at end of file
Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCM.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCM.xml?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCM.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccCasCM.xml Mon Sep 30 13:33:32 2013
@@ -0,0 +1,57 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+ <!--
+ ***************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ***************************************************************
+ -->
+
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <primitive>true</primitive>
+ <annotatorImplementationName>org.apache.uima.ducc.sampleapps.DuccCasCM</annotatorImplementationName>
+ <analysisEngineMetaData>
+ <name>DuccCasCM</name>
+ <description>Reads ...</description>
+ <version>1.0</version>
+ <vendor/>
+ <configurationParameters/>
+ <configurationParameterSettings/>
+ <typeSystemDescription>
+ <imports>
+ <import name="org.apache.uima.ducc.common.uima.DuccJobFlowControlTS"/>
+ <import name="org.apache.uima.ducc.sampleapps.DuccDocumentInfoTS"/>
+ </imports>
+ </typeSystemDescription>
+ <typePriorities/>
+ <fsIndexCollection/>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs/>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+ <outputsNewCASes>true</outputsNewCASes>
+ </operationalProperties>
+ </analysisEngineMetaData>
+ <resourceManagerConfiguration/>
+</analysisEngineDescription>
Added: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobCasCR.xml
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobCasCR.xml?rev=1527553&view=auto
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobCasCR.xml (added)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/resources/org/apache/uima/ducc/sampleapps/DuccJobCasCR.xml Mon Sep 30 13:33:32 2013
@@ -0,0 +1,98 @@
+<?xml version="1.0" encoding="UTF-8"?><collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+
+ <!--
+ ***************************************************************
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ ***************************************************************
+ -->
+
+ <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+ <implementationName>org.apache.uima.ducc.sampleapps.DuccJobCasCR</implementationName>
+ <processingResourceMetaData>
+ <name>DuccJobCasCR</name>
+ <description>Generates CASes with reference to input.</description>
+ <version>1.0</version>
+ <vendor>Apache UIMA</vendor>
+ <configurationParameters>
+ <configurationParameter>
+ <name>InputSpec</name>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>OutputDirectory</name>
+ <description>The base output directory</description>
+ <type>String</type>
+ <multiValued>false</multiValued>
+ <mandatory>true</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>IgnorePreviousOutput</name>
+ <type>Boolean</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>SendToLast</name>
+ <type>Boolean</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ <configurationParameter>
+ <name>SendToAll</name>
+ <type>Boolean</type>
+ <multiValued>false</multiValued>
+ <mandatory>false</mandatory>
+ </configurationParameter>
+ </configurationParameters>
+ <configurationParameterSettings>
+ <nameValuePair>
+ <name>InputSpec</name>
+ <value>
+ <string>/tmp</string>
+ </value>
+ </nameValuePair>
+ <nameValuePair>
+ <name>OutputDirectory</name>
+ <value>
+ <string>/tmp</string>
+ </value>
+ </nameValuePair>
+ </configurationParameterSettings>
+ <typeSystemDescription>
+ <imports>
+ <import name="org.apache.uima.ducc.common.uima.DuccJobFlowControlTS"/>
+ </imports>
+ </typeSystemDescription>
+ <typePriorities/>
+ <fsIndexCollection/>
+ <capabilities>
+ <capability>
+ <inputs/>
+ <outputs/>
+ <languagesSupported/>
+ </capability>
+ </capabilities>
+ <operationalProperties>
+ <modifiesCas>true</modifiesCas>
+ <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+ <outputsNewCASes>true</outputsNewCASes>
+ </operationalProperties>
+ </processingResourceMetaData>
+</collectionReaderDescription>
\ No newline at end of file