You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by ea...@apache.org on 2013/12/02 22:14:18 UTC
svn commit: r1547193 - in
/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps:
DuccCasCC.java DuccCasCM.java DuccJobCasCR.java DuccJobTextCR.java
DuccSampleAE.java DuccTextCM.java
Author: eae
Date: Mon Dec 2 21:14:17 2013
New Revision: 1547193
URL: http://svn.apache.org/r1547193
Log:
UIMA-3461 Add class comments for sample code
Modified:
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccSampleAE.java
uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCC.java Mon Dec 2 21:14:17 2013
@@ -19,6 +19,18 @@
package org.apache.uima.ducc.sampleapps;
+/*
+ * This sample Cas Consumer is designed to create an output zip file for each Work Item.
+ * The CAS compression format is selectable as either ZIP compressed XmiCas or UIMA
+ * compressed binary form 6 format. When compressed binary is used, each zip file also
+ * contains the full UIMA Type System in ZIP compressed text.
+ * CASes in UIMA compressed binary form 6 format have the same flexibility as an XmiCas
+ * in that they can be deserialized into a CAS with a different, but compatible Type System.
+ *
+ * See more information in DUCC Book chapters on sample applications.
+ *
+ */
+
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccCasCM.java Mon Dec 2 21:14:17 2013
@@ -19,6 +19,15 @@
package org.apache.uima.ducc.sampleapps;
+/*
+ * This sample Cas Multiplier reads compressed CASes from a specified zipfile
+ * and returns each as a child CAS. A zipfile may contain zip-compressed XMI
+ * format CASes or UIMA compressed binary form 6 format CASes.
+ *
+ * See more information in DUCC Book chapters on sample applications.
+ *
+ */
+
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobCasCR.java Mon Dec 2 21:14:17 2013
@@ -36,8 +36,11 @@ import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
/**
- * A DUCC Job collection reader that reads text files from a directory in the filesystem.
- * It can be configured with the following parameters:
+ * A DUCC Job collection reader that returns a Work Item CAS for each zipfile found in
+ * a directory. By default any previously completed output files found in the output
+ * directory are preserved, but a configuration parameter is available to ignore previous output.
+
+ * This CR can be configured with the following parameters:
* <ul>
* <li><code>InputSpec</code> - path to directory containing input *.zip files</li>
* <li><code>OutputDirectory</code> - path to directory for output files</li>
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccJobTextCR.java Mon Dec 2 21:14:17 2013
@@ -37,8 +37,15 @@ import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
/**
- * A simple DUCC Job collection reader that reads text files from a directory in the filesystem.
- * It can be configured with the following parameters:
+ * A DUCC Job collection reader that segments a collection text files from a directory
+ * in the filesystem into separate Work Items for a DUCC job.
+ * In order to facilitate processing scale out, an optional blocksize parameter can be
+ * specified that will be used to break larger files into multiple Work Items.
+ *
+ * By default any previously completed output files found in the output directory are preserved,
+ * but a configuration parameter is available to ignore previous output.
+
+ * This CR can be configured with the following parameters:
* <ul>
* <li><code>InputDirectory</code> - path to directory containing input files</li>
* <li><code>OutputDirectory</code> - path to directory for output files</li>
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccSampleAE.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccSampleAE.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccSampleAE.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccSampleAE.java Mon Dec 2 21:14:17 2013
@@ -19,6 +19,15 @@
package org.apache.uima.ducc.sampleapps;
+/*
+ * This sample Analysis Engine is a no-op annotator. It is used to
+ * demonstrate DUCC's overhead whenb processing a collection of compressed
+ * CASes.
+ *
+ * See more information in DUCC Book chapters on sample applications.
+ *
+ */
+
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.jcas.JCas;
Modified: uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java
URL: http://svn.apache.org/viewvc/uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java?rev=1547193&r1=1547192&r2=1547193&view=diff
==============================================================================
--- uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java (original)
+++ uima/sandbox/uima-ducc/trunk/uima-ducc-examples/src/main/java/org/apache/uima/ducc/sampleapps/DuccTextCM.java Mon Dec 2 21:14:17 2013
@@ -19,6 +19,17 @@
package org.apache.uima.ducc.sampleapps;
+/*
+ * This sample Cas Multiplier uses paragraph boundaries to segment a text file,
+ * or a part of a text file, into multiple documents. A child CAS is created
+ * for each document. Paragraphs that cross block boundaries are processed
+ * in the block where they started. An error is thrown if a paragraph crosses
+ * two block boundaries.
+ *
+ * See more information in DUCC Book chapters on sample applications.
+ *
+ */
+
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;